FFmpeg
fft_template.c
Go to the documentation of this file.
1 /*
2  * FFT/IFFT transforms
3  * Copyright (c) 2008 Loren Merritt
4  * Copyright (c) 2002 Fabrice Bellard
5  * Partly based on libdjbfft by D. J. Bernstein
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * FFT/IFFT transforms.
27  */
28 
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libavutil/mathematics.h"
32 #include "libavutil/thread.h"
33 #include "fft.h"
34 #include "fft-internal.h"
35 
36 #if FFT_FIXED_32
37 #include "fft_table.h"
38 #else /* FFT_FIXED_32 */
39 
40 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
41 #if !CONFIG_HARDCODED_TABLES
42 COSTABLE(16);
43 COSTABLE(32);
44 COSTABLE(64);
45 COSTABLE(128);
46 COSTABLE(256);
47 COSTABLE(512);
48 COSTABLE(1024);
49 COSTABLE(2048);
50 COSTABLE(4096);
51 COSTABLE(8192);
52 COSTABLE(16384);
53 COSTABLE(32768);
54 COSTABLE(65536);
55 COSTABLE(131072);
56 
57 static av_cold void init_ff_cos_tabs(int index)
58 {
59  int i;
60  int m = 1<<index;
61  double freq = 2*M_PI/m;
62  FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
63  for(i=0; i<=m/4; i++)
64  tab[i] = FIX15(cos(i*freq));
65  for(i=1; i<m/4; i++)
66  tab[m/2-i] = tab[i];
67 }
68 
69 typedef struct CosTabsInitOnce {
70  void (*func)(void);
73 
74 #define INIT_FF_COS_TABS_FUNC(index, size) \
75 static av_cold void init_ff_cos_tabs_ ## size (void)\
76 { \
77  init_ff_cos_tabs(index); \
78 }
79 
86 INIT_FF_COS_TABS_FUNC(10, 1024)
87 INIT_FF_COS_TABS_FUNC(11, 2048)
88 INIT_FF_COS_TABS_FUNC(12, 4096)
89 INIT_FF_COS_TABS_FUNC(13, 8192)
90 INIT_FF_COS_TABS_FUNC(14, 16384)
91 INIT_FF_COS_TABS_FUNC(15, 32768)
92 INIT_FF_COS_TABS_FUNC(16, 65536)
93 INIT_FF_COS_TABS_FUNC(17, 131072)
94 
96  { NULL },
97  { NULL },
98  { NULL },
99  { NULL },
100  { init_ff_cos_tabs_16, AV_ONCE_INIT },
101  { init_ff_cos_tabs_32, AV_ONCE_INIT },
102  { init_ff_cos_tabs_64, AV_ONCE_INIT },
103  { init_ff_cos_tabs_128, AV_ONCE_INIT },
104  { init_ff_cos_tabs_256, AV_ONCE_INIT },
105  { init_ff_cos_tabs_512, AV_ONCE_INIT },
106  { init_ff_cos_tabs_1024, AV_ONCE_INIT },
107  { init_ff_cos_tabs_2048, AV_ONCE_INIT },
108  { init_ff_cos_tabs_4096, AV_ONCE_INIT },
109  { init_ff_cos_tabs_8192, AV_ONCE_INIT },
110  { init_ff_cos_tabs_16384, AV_ONCE_INIT },
111  { init_ff_cos_tabs_32768, AV_ONCE_INIT },
112  { init_ff_cos_tabs_65536, AV_ONCE_INIT },
113  { init_ff_cos_tabs_131072, AV_ONCE_INIT },
114 };
115 
116 #endif
117 COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
118  NULL, NULL, NULL, NULL,
119  FFT_NAME(ff_cos_16),
120  FFT_NAME(ff_cos_32),
121  FFT_NAME(ff_cos_64),
122  FFT_NAME(ff_cos_128),
123  FFT_NAME(ff_cos_256),
124  FFT_NAME(ff_cos_512),
125  FFT_NAME(ff_cos_1024),
126  FFT_NAME(ff_cos_2048),
127  FFT_NAME(ff_cos_4096),
128  FFT_NAME(ff_cos_8192),
129  FFT_NAME(ff_cos_16384),
130  FFT_NAME(ff_cos_32768),
131  FFT_NAME(ff_cos_65536),
132  FFT_NAME(ff_cos_131072),
133 };
134 
135 #endif /* FFT_FIXED_32 */
136 
137 static void fft_permute_c(FFTContext *s, FFTComplex *z);
138 static void fft_calc_c(FFTContext *s, FFTComplex *z);
139 
140 static int split_radix_permutation(int i, int n, int inverse)
141 {
142  int m;
143  if(n <= 2) return i&1;
144  m = n >> 1;
145  if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
146  m >>= 1;
147  if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
148  else return split_radix_permutation(i, m, inverse)*4 - 1;
149 }
150 
152 {
153 #if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32)
155 #endif
156 }
157 
158 static const int avx_tab[] = {
159  0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
160 };
161 
162 static int is_second_half_of_fft32(int i, int n)
163 {
164  if (n <= 32)
165  return i >= 16;
166  else if (i < n/2)
167  return is_second_half_of_fft32(i, n/2);
168  else if (i < 3*n/4)
169  return is_second_half_of_fft32(i - n/2, n/4);
170  else
171  return is_second_half_of_fft32(i - 3*n/4, n/4);
172 }
173 
175 {
176  int i;
177  int n = 1 << s->nbits;
178 
179  for (i = 0; i < n; i += 16) {
180  int k;
181  if (is_second_half_of_fft32(i, n)) {
182  for (k = 0; k < 16; k++)
183  s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
184  i + avx_tab[k];
185 
186  } else {
187  for (k = 0; k < 16; k++) {
188  int j = i + k;
189  j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
190  s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
191  }
192  }
193  }
194 }
195 
196 av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
197 {
198  int i, j, n;
199 
200  s->revtab = NULL;
201  s->revtab32 = NULL;
202 
203  if (nbits < 2 || nbits > 17)
204  goto fail;
205  s->nbits = nbits;
206  n = 1 << nbits;
207 
208  if (nbits <= 16) {
209  s->revtab = av_malloc(n * sizeof(uint16_t));
210  if (!s->revtab)
211  goto fail;
212  } else {
213  s->revtab32 = av_malloc(n * sizeof(uint32_t));
214  if (!s->revtab32)
215  goto fail;
216  }
217  s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
218  if (!s->tmp_buf)
219  goto fail;
220  s->inverse = inverse;
221  s->fft_permutation = FF_FFT_PERM_DEFAULT;
222 
223  s->fft_permute = fft_permute_c;
224  s->fft_calc = fft_calc_c;
225 #if CONFIG_MDCT
226  s->imdct_calc = ff_imdct_calc_c;
227  s->imdct_half = ff_imdct_half_c;
228  s->mdct_calc = ff_mdct_calc_c;
229 #endif
230 
231 #if FFT_FIXED_32
232  ff_fft_lut_init();
233 #else /* FFT_FIXED_32 */
234 #if FFT_FLOAT
235  if (ARCH_AARCH64) ff_fft_init_aarch64(s);
236  if (ARCH_ARM) ff_fft_init_arm(s);
237  if (ARCH_PPC) ff_fft_init_ppc(s);
238  if (ARCH_X86) ff_fft_init_x86(s);
239  if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc;
240  if (HAVE_MIPSFPU) ff_fft_init_mips(s);
241 #else
242  if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c;
243  if (ARCH_ARM) ff_fft_fixed_init_arm(s);
244 #endif
245  for(j=4; j<=nbits; j++) {
247  }
248 #endif /* FFT_FIXED_32 */
249 
250 
251  if (s->fft_permutation == FF_FFT_PERM_AVX) {
252  fft_perm_avx(s);
253  } else {
254 #define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
255  for(i = 0; i < n; i++) {\
256  int k;\
257  j = i;\
258  j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
259  k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
260  s->revtab##num[k] = j;\
261  } \
262 } while(0);
263 
264 #define PROCESS_FFT_PERM_DEFAULT(num) do {\
265  for(i = 0; i < n; i++) {\
266  int k;\
267  j = i;\
268  k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
269  s->revtab##num[k] = j;\
270  } \
271 } while(0);
272 
273 #define SPLIT_RADIX_PERMUTATION(num) do { \
274  if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
275  PROCESS_FFT_PERM_SWAP_LSBS(num) \
276  } else {\
277  PROCESS_FFT_PERM_DEFAULT(num) \
278  }\
279 } while(0);
280 
281  if (s->revtab)
283  if (s->revtab32)
285 
286 #undef PROCESS_FFT_PERM_DEFAULT
287 #undef PROCESS_FFT_PERM_SWAP_LSBS
288 #undef SPLIT_RADIX_PERMUTATION
289  }
290 
291  return 0;
292  fail:
293  av_freep(&s->revtab);
294  av_freep(&s->revtab32);
295  av_freep(&s->tmp_buf);
296  return -1;
297 }
298 
300 {
301  int j, np;
302  const uint16_t *revtab = s->revtab;
303  const uint32_t *revtab32 = s->revtab32;
304  np = 1 << s->nbits;
305  /* TODO: handle split-radix permute in a more optimal way, probably in-place */
306  if (revtab) {
307  for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
308  } else
309  for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j];
310 
311  memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
312 }
313 
315 {
316  av_freep(&s->revtab);
317  av_freep(&s->revtab32);
318  av_freep(&s->tmp_buf);
319 }
320 
321 #if FFT_FIXED_32
322 
323 static void fft_calc_c(FFTContext *s, FFTComplex *z) {
324 
325  int nbits, i, n, num_transforms, offset, step;
326  int n4, n2, n34;
327  unsigned tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
328  FFTComplex *tmpz;
329  const int fft_size = (1 << s->nbits);
330  int64_t accu;
331 
332  num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
333 
334  for (n=0; n<num_transforms; n++){
335  offset = ff_fft_offsets_lut[n] << 2;
336  tmpz = z + offset;
337 
338  tmp1 = tmpz[0].re + (unsigned)tmpz[1].re;
339  tmp5 = tmpz[2].re + (unsigned)tmpz[3].re;
340  tmp2 = tmpz[0].im + (unsigned)tmpz[1].im;
341  tmp6 = tmpz[2].im + (unsigned)tmpz[3].im;
342  tmp3 = tmpz[0].re - (unsigned)tmpz[1].re;
343  tmp8 = tmpz[2].im - (unsigned)tmpz[3].im;
344  tmp4 = tmpz[0].im - (unsigned)tmpz[1].im;
345  tmp7 = tmpz[2].re - (unsigned)tmpz[3].re;
346 
347  tmpz[0].re = tmp1 + tmp5;
348  tmpz[2].re = tmp1 - tmp5;
349  tmpz[0].im = tmp2 + tmp6;
350  tmpz[2].im = tmp2 - tmp6;
351  tmpz[1].re = tmp3 + tmp8;
352  tmpz[3].re = tmp3 - tmp8;
353  tmpz[1].im = tmp4 - tmp7;
354  tmpz[3].im = tmp4 + tmp7;
355  }
356 
357  if (fft_size < 8)
358  return;
359 
360  num_transforms = (num_transforms >> 1) | 1;
361 
362  for (n=0; n<num_transforms; n++){
363  offset = ff_fft_offsets_lut[n] << 3;
364  tmpz = z + offset;
365 
366  tmp1 = tmpz[4].re + (unsigned)tmpz[5].re;
367  tmp3 = tmpz[6].re + (unsigned)tmpz[7].re;
368  tmp2 = tmpz[4].im + (unsigned)tmpz[5].im;
369  tmp4 = tmpz[6].im + (unsigned)tmpz[7].im;
370  tmp5 = tmp1 + tmp3;
371  tmp7 = tmp1 - tmp3;
372  tmp6 = tmp2 + tmp4;
373  tmp8 = tmp2 - tmp4;
374 
375  tmp1 = tmpz[4].re - (unsigned)tmpz[5].re;
376  tmp2 = tmpz[4].im - (unsigned)tmpz[5].im;
377  tmp3 = tmpz[6].re - (unsigned)tmpz[7].re;
378  tmp4 = tmpz[6].im - (unsigned)tmpz[7].im;
379 
380  tmpz[4].re = tmpz[0].re - tmp5;
381  tmpz[0].re = tmpz[0].re + tmp5;
382  tmpz[4].im = tmpz[0].im - tmp6;
383  tmpz[0].im = tmpz[0].im + tmp6;
384  tmpz[6].re = tmpz[2].re - tmp8;
385  tmpz[2].re = tmpz[2].re + tmp8;
386  tmpz[6].im = tmpz[2].im + tmp7;
387  tmpz[2].im = tmpz[2].im - tmp7;
388 
389  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp1 + tmp2);
390  tmp5 = (int32_t)((accu + 0x40000000) >> 31);
391  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 - tmp4);
392  tmp7 = (int32_t)((accu + 0x40000000) >> 31);
393  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp2 - tmp1);
394  tmp6 = (int32_t)((accu + 0x40000000) >> 31);
395  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 + tmp4);
396  tmp8 = (int32_t)((accu + 0x40000000) >> 31);
397  tmp1 = tmp5 + tmp7;
398  tmp3 = tmp5 - tmp7;
399  tmp2 = tmp6 + tmp8;
400  tmp4 = tmp6 - tmp8;
401 
402  tmpz[5].re = tmpz[1].re - tmp1;
403  tmpz[1].re = tmpz[1].re + tmp1;
404  tmpz[5].im = tmpz[1].im - tmp2;
405  tmpz[1].im = tmpz[1].im + tmp2;
406  tmpz[7].re = tmpz[3].re - tmp4;
407  tmpz[3].re = tmpz[3].re + tmp4;
408  tmpz[7].im = tmpz[3].im + tmp3;
409  tmpz[3].im = tmpz[3].im - tmp3;
410  }
411 
412  step = 1 << ((MAX_LOG2_NFFT-4) - 4);
413  n4 = 4;
414 
415  for (nbits=4; nbits<=s->nbits; nbits++){
416  n2 = 2*n4;
417  n34 = 3*n4;
418  num_transforms = (num_transforms >> 1) | 1;
419 
420  for (n=0; n<num_transforms; n++){
421  const FFTSample *w_re_ptr = ff_w_tab_sr + step;
422  const FFTSample *w_im_ptr = ff_w_tab_sr + MAX_FFT_SIZE/(4*16) - step;
423  offset = ff_fft_offsets_lut[n] << nbits;
424  tmpz = z + offset;
425 
426  tmp5 = tmpz[ n2].re + (unsigned)tmpz[n34].re;
427  tmp1 = tmpz[ n2].re - (unsigned)tmpz[n34].re;
428  tmp6 = tmpz[ n2].im + (unsigned)tmpz[n34].im;
429  tmp2 = tmpz[ n2].im - (unsigned)tmpz[n34].im;
430 
431  tmpz[ n2].re = tmpz[ 0].re - tmp5;
432  tmpz[ 0].re = tmpz[ 0].re + tmp5;
433  tmpz[ n2].im = tmpz[ 0].im - tmp6;
434  tmpz[ 0].im = tmpz[ 0].im + tmp6;
435  tmpz[n34].re = tmpz[n4].re - tmp2;
436  tmpz[ n4].re = tmpz[n4].re + tmp2;
437  tmpz[n34].im = tmpz[n4].im + tmp1;
438  tmpz[ n4].im = tmpz[n4].im - tmp1;
439 
440  for (i=1; i<n4; i++){
441  FFTSample w_re = w_re_ptr[0];
442  FFTSample w_im = w_im_ptr[0];
443  accu = (int64_t)w_re*tmpz[ n2+i].re;
444  accu += (int64_t)w_im*tmpz[ n2+i].im;
445  tmp1 = (int32_t)((accu + 0x40000000) >> 31);
446  accu = (int64_t)w_re*tmpz[ n2+i].im;
447  accu -= (int64_t)w_im*tmpz[ n2+i].re;
448  tmp2 = (int32_t)((accu + 0x40000000) >> 31);
449  accu = (int64_t)w_re*tmpz[n34+i].re;
450  accu -= (int64_t)w_im*tmpz[n34+i].im;
451  tmp3 = (int32_t)((accu + 0x40000000) >> 31);
452  accu = (int64_t)w_re*tmpz[n34+i].im;
453  accu += (int64_t)w_im*tmpz[n34+i].re;
454  tmp4 = (int32_t)((accu + 0x40000000) >> 31);
455 
456  tmp5 = tmp1 + tmp3;
457  tmp1 = tmp1 - tmp3;
458  tmp6 = tmp2 + tmp4;
459  tmp2 = tmp2 - tmp4;
460 
461  tmpz[ n2+i].re = tmpz[ i].re - tmp5;
462  tmpz[ i].re = tmpz[ i].re + tmp5;
463  tmpz[ n2+i].im = tmpz[ i].im - tmp6;
464  tmpz[ i].im = tmpz[ i].im + tmp6;
465  tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
466  tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
467  tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
468  tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
469 
470  w_re_ptr += step;
471  w_im_ptr -= step;
472  }
473  }
474  step >>= 1;
475  n4 <<= 1;
476  }
477 }
478 
479 #else /* FFT_FIXED_32 */
480 
481 #define BUTTERFLIES(a0,a1,a2,a3) {\
482  BF(t3, t5, t5, t1);\
483  BF(a2.re, a0.re, a0.re, t5);\
484  BF(a3.im, a1.im, a1.im, t3);\
485  BF(t4, t6, t2, t6);\
486  BF(a3.re, a1.re, a1.re, t4);\
487  BF(a2.im, a0.im, a0.im, t6);\
488 }
489 
490 // force loading all the inputs before storing any.
491 // this is slightly slower for small data, but avoids store->load aliasing
492 // for addresses separated by large powers of 2.
493 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
494  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
495  BF(t3, t5, t5, t1);\
496  BF(a2.re, a0.re, r0, t5);\
497  BF(a3.im, a1.im, i1, t3);\
498  BF(t4, t6, t2, t6);\
499  BF(a3.re, a1.re, r1, t4);\
500  BF(a2.im, a0.im, i0, t6);\
501 }
502 
503 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
504  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
505  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
506  BUTTERFLIES(a0,a1,a2,a3)\
507 }
508 
509 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
510  t1 = a2.re;\
511  t2 = a2.im;\
512  t5 = a3.re;\
513  t6 = a3.im;\
514  BUTTERFLIES(a0,a1,a2,a3)\
515 }
516 
517 /* z[0...8n-1], w[1...2n-1] */
518 #define PASS(name)\
519 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
520 {\
521  FFTDouble t1, t2, t3, t4, t5, t6;\
522  int o1 = 2*n;\
523  int o2 = 4*n;\
524  int o3 = 6*n;\
525  const FFTSample *wim = wre+o1;\
526  n--;\
527 \
528  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
529  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
530  do {\
531  z += 2;\
532  wre += 2;\
533  wim -= 2;\
534  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
535  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
536  } while(--n);\
537 }
538 
539 PASS(pass)
540 #if !CONFIG_SMALL
541 #undef BUTTERFLIES
542 #define BUTTERFLIES BUTTERFLIES_BIG
543 PASS(pass_big)
544 #endif
545 
546 #define DECL_FFT(n,n2,n4)\
547 static void fft##n(FFTComplex *z)\
548 {\
549  fft##n2(z);\
550  fft##n4(z+n4*2);\
551  fft##n4(z+n4*3);\
552  pass(z,FFT_NAME(ff_cos_##n),n4/2);\
553 }
554 
555 static void fft4(FFTComplex *z)
556 {
557  FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
558 
559  BF(t3, t1, z[0].re, z[1].re);
560  BF(t8, t6, z[3].re, z[2].re);
561  BF(z[2].re, z[0].re, t1, t6);
562  BF(t4, t2, z[0].im, z[1].im);
563  BF(t7, t5, z[2].im, z[3].im);
564  BF(z[3].im, z[1].im, t4, t8);
565  BF(z[3].re, z[1].re, t3, t7);
566  BF(z[2].im, z[0].im, t2, t5);
567 }
568 
569 static void fft8(FFTComplex *z)
570 {
571  FFTDouble t1, t2, t3, t4, t5, t6;
572 
573  fft4(z);
574 
575  BF(t1, z[5].re, z[4].re, -z[5].re);
576  BF(t2, z[5].im, z[4].im, -z[5].im);
577  BF(t5, z[7].re, z[6].re, -z[7].re);
578  BF(t6, z[7].im, z[6].im, -z[7].im);
579 
580  BUTTERFLIES(z[0],z[2],z[4],z[6]);
581  TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
582 }
583 
584 #if !CONFIG_SMALL
585 static void fft16(FFTComplex *z)
586 {
587  FFTDouble t1, t2, t3, t4, t5, t6;
588  FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
589  FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
590 
591  fft8(z);
592  fft4(z+8);
593  fft4(z+12);
594 
595  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
596  TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
597  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
598  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
599 }
600 #else
601 DECL_FFT(16,8,4)
602 #endif
603 DECL_FFT(32,16,8)
604 DECL_FFT(64,32,16)
605 DECL_FFT(128,64,32)
606 DECL_FFT(256,128,64)
607 DECL_FFT(512,256,128)
608 #if !CONFIG_SMALL
609 #define pass pass_big
610 #endif
611 DECL_FFT(1024,512,256)
612 DECL_FFT(2048,1024,512)
613 DECL_FFT(4096,2048,1024)
614 DECL_FFT(8192,4096,2048)
615 DECL_FFT(16384,8192,4096)
616 DECL_FFT(32768,16384,8192)
617 DECL_FFT(65536,32768,16384)
618 DECL_FFT(131072,65536,32768)
619 
620 static void (* const fft_dispatch[])(FFTComplex*) = {
621  fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
622  fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
623 };
624 
626 {
627  fft_dispatch[s->nbits-2](z);
628 }
629 #endif /* FFT_FIXED_32 */
fft16
static void fft16(FFTComplex *z)
Definition: fft_template.c:585
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
ff_fft_init_ppc
void ff_fft_init_ppc(FFTContext *s)
Definition: fft_init.c:151
ff_fft_lut_init
void ff_fft_lut_init(void)
Definition: fft_init_table.c:339
BUTTERFLIES
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: fft_template.c:542
thread.h
split_radix_permutation
static int split_radix_permutation(int i, int n, int inverse)
Definition: fft_template.c:140
im
float im
Definition: fft.c:82
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
t1
#define t1
Definition: regdef.h:29
mathematics.h
CosTabsInitOnce::func
void(* func)(void)
Definition: fft_template.c:70
MAX_FFT_SIZE
#define MAX_FFT_SIZE
Definition: fft_table.h:60
SPLIT_RADIX_PERMUTATION
#define SPLIT_RADIX_PERMUTATION(num)
DECL_FFT
#define DECL_FFT(n, n2, n4)
Definition: fft_template.c:546
ff_fft_init_aarch64
av_cold void ff_fft_init_aarch64(FFTContext *s)
Definition: fft_init_aarch64.c:36
FF_FFT_PERM_DEFAULT
@ FF_FFT_PERM_DEFAULT
Definition: fft.h:78
cos_tabs_init_once
static CosTabsInitOnce cos_tabs_init_once[]
Definition: fft_template.c:95
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:31
COSTABLE
COSTABLE(16)
fail
#define fail()
Definition: checkasm.h:123
tab
static const struct twinvq_data tab
Definition: twinvq_data.h:11135
CosTabsInitOnce::control
AVOnce control
Definition: fft_template.c:71
INIT_FF_COS_TABS_FUNC
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: fft_template.c:74
fft_dispatch
static void(*const fft_dispatch[])(FFTComplex *)
Definition: fft_template.c:620
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:175
av_cold
#define av_cold
Definition: attributes.h:90
ff_mdct_calc_c
#define ff_mdct_calc_c
Definition: fft-internal.h:88
s
#define s(width, name)
Definition: cbs_vp9.c:257
ff_fft_init_x86
void ff_fft_init_x86(FFTContext *s)
Definition: fft_init.c:27
ff_fft_fixed_init_arm
av_cold void ff_fft_fixed_init_arm(FFTContext *s)
Definition: fft_fixed_init_arm.c:32
ff_fft_init
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
Set up a complex FFT.
Definition: fft_template.c:196
t7
#define t7
Definition: regdef.h:35
ff_mdct_calcw_c
void ff_mdct_calcw_c(FFTContext *s, FFTDouble *output, const FFTSample *input)
Definition: mdct_fixed.c:24
pass
#define pass
Definition: fft_template.c:609
int32_t
int32_t
Definition: audio_convert.c:194
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:173
ff_fft_init_arm
av_cold void ff_fft_init_arm(FFTContext *s)
Definition: fft_init_arm.c:38
NULL
#define NULL
Definition: coverity.c:32
COSTABLE_CONST
#define COSTABLE_CONST
Definition: fft.h:119
t5
#define t5
Definition: regdef.h:33
fft-internal.h
t6
#define t6
Definition: regdef.h:34
MAX_LOG2_NFFT
#define MAX_LOG2_NFFT
Specifies maximum allowed fft size.
Definition: fft_table.h:59
FFTSample
float FFTSample
Definition: avfft.h:35
fft_perm_avx
static av_cold void fft_perm_avx(FFTContext *s)
Definition: fft_template.c:174
AVOnce
#define AVOnce
Definition: thread.h:172
fft_table.h
index
int index
Definition: gxfenc.c:89
is_second_half_of_fft32
static int is_second_half_of_fft32(int i, int n)
Definition: fft_template.c:162
CosTabsInitOnce
Definition: fft_template.c:69
TRANSFORM
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: fft_template.c:503
FFTComplex::im
FFTSample im
Definition: avfft.h:38
ff_fft_init_mips
void ff_fft_init_mips(FFTContext *s)
FFT transform.
Definition: fft_mips.c:501
FFTDouble
float FFTDouble
Definition: fft.h:43
FFTComplex::re
FFTSample re
Definition: avfft.h:38
t8
#define t8
Definition: regdef.h:53
BF
#define BF(a, b, c, s)
Definition: dct32_template.c:90
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
M_PI
#define M_PI
Definition: mathematics.h:52
FFTContext
Definition: fft.h:88
PASS
#define PASS(name)
Definition: fft_template.c:518
ff_fft_offsets_lut
uint16_t ff_fft_offsets_lut[21845]
Definition: fft_init_table.c:317
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
ff_imdct_half_c
#define ff_imdct_half_c
Definition: fft-internal.h:87
ff_imdct_calc_c
#define ff_imdct_calc_c
Definition: fft-internal.h:86
M_SQRT1_2
#define M_SQRT1_2
Definition: mathematics.h:58
fft.h
t2
#define t2
Definition: regdef.h:30
fft4
static void fft4(FFTComplex *z)
Definition: fft_template.c:555
fft_permute_c
static void fft_permute_c(FFTContext *s, FFTComplex *z)
Definition: fft_template.c:299
Q31
#define Q31(x)
Definition: aac_defines.h:96
FFT_NAME
COSTABLE_CONST FFTSample *const FFT_NAME(ff_cos_tabs)[]
ff_fft_end
av_cold void ff_fft_end(FFTContext *s)
Definition: fft_template.c:314
init_ff_cos_tabs
static av_cold void init_ff_cos_tabs(int index)
Definition: fft_template.c:57
ff_init_ff_cos_tabs
av_cold void ff_init_ff_cos_tabs(int index)
Initialize the cosine table in ff_cos_tabs[index].
Definition: fft_template.c:151
sqrthalf
#define sqrthalf
Definition: fft-internal.h:64
fft_calc_c
static void fft_calc_c(FFTContext *s, FFTComplex *z)
Definition: fft_template.c:625
ff_w_tab_sr
const int32_t ff_w_tab_sr[MAX_FFT_SIZE/(4 *16)]
Definition: fft_init_table.c:58
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
TRANSFORM_ZERO
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: fft_template.c:509
inverse
static uint32_t inverse(uint32_t v)
find multiplicative inverse modulo 2 ^ 32
Definition: asfcrypt.c:35
FIX15
#define FIX15(a)
Definition: fft-internal.h:62
fft8
static void fft8(FFTComplex *z)
Definition: fft_template.c:569
int
int
Definition: ffmpeg_filter.c:192
FF_FFT_PERM_AVX
@ FF_FFT_PERM_AVX
Definition: fft.h:80
avx_tab
static const int avx_tab[]
Definition: fft_template.c:158
FFTComplex
Definition: avfft.h:37
re
float re
Definition: fft.c:82