FFmpeg
fft_template.c
Go to the documentation of this file.
1 /*
2  * FFT/IFFT transforms
3  * Copyright (c) 2008 Loren Merritt
4  * Copyright (c) 2002 Fabrice Bellard
5  * Partly based on libdjbfft by D. J. Bernstein
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * FFT/IFFT transforms.
27  */
28 
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libavutil/mathematics.h"
32 #include "libavutil/thread.h"
33 #include "fft.h"
34 #include "fft-internal.h"
35 
36 #if FFT_FIXED_32
37 #include "fft_table.h"
38 #else /* FFT_FIXED_32 */
39 
40 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
41 #if !CONFIG_HARDCODED_TABLES
42 COSTABLE(16);
43 COSTABLE(32);
44 COSTABLE(64);
45 COSTABLE(128);
46 COSTABLE(256);
47 COSTABLE(512);
48 COSTABLE(1024);
49 COSTABLE(2048);
50 COSTABLE(4096);
51 COSTABLE(8192);
52 COSTABLE(16384);
53 COSTABLE(32768);
54 COSTABLE(65536);
55 COSTABLE(131072);
56 
57 static av_cold void init_ff_cos_tabs(int index)
58 {
59  int i;
60  int m = 1<<index;
61  double freq = 2*M_PI/m;
62  FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
63  for(i=0; i<=m/4; i++)
64  tab[i] = FIX15(cos(i*freq));
65  for(i=1; i<m/4; i++)
66  tab[m/2-i] = tab[i];
67 }
68 
69 typedef struct CosTabsInitOnce {
70  void (*func)(void);
73 
74 #define INIT_FF_COS_TABS_FUNC(index, size) \
75 static av_cold void init_ff_cos_tabs_ ## size (void)\
76 { \
77  init_ff_cos_tabs(index); \
78 }
79 
86 INIT_FF_COS_TABS_FUNC(10, 1024)
87 INIT_FF_COS_TABS_FUNC(11, 2048)
88 INIT_FF_COS_TABS_FUNC(12, 4096)
89 INIT_FF_COS_TABS_FUNC(13, 8192)
90 INIT_FF_COS_TABS_FUNC(14, 16384)
91 INIT_FF_COS_TABS_FUNC(15, 32768)
92 INIT_FF_COS_TABS_FUNC(16, 65536)
93 INIT_FF_COS_TABS_FUNC(17, 131072)
94 
96  { NULL },
97  { NULL },
98  { NULL },
99  { NULL },
100  { init_ff_cos_tabs_16, AV_ONCE_INIT },
101  { init_ff_cos_tabs_32, AV_ONCE_INIT },
102  { init_ff_cos_tabs_64, AV_ONCE_INIT },
103  { init_ff_cos_tabs_128, AV_ONCE_INIT },
104  { init_ff_cos_tabs_256, AV_ONCE_INIT },
105  { init_ff_cos_tabs_512, AV_ONCE_INIT },
106  { init_ff_cos_tabs_1024, AV_ONCE_INIT },
107  { init_ff_cos_tabs_2048, AV_ONCE_INIT },
108  { init_ff_cos_tabs_4096, AV_ONCE_INIT },
109  { init_ff_cos_tabs_8192, AV_ONCE_INIT },
110  { init_ff_cos_tabs_16384, AV_ONCE_INIT },
111  { init_ff_cos_tabs_32768, AV_ONCE_INIT },
112  { init_ff_cos_tabs_65536, AV_ONCE_INIT },
113  { init_ff_cos_tabs_131072, AV_ONCE_INIT },
114 };
115 
117 {
119 }
120 #endif
121 COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
122  NULL, NULL, NULL, NULL,
123  FFT_NAME(ff_cos_16),
124  FFT_NAME(ff_cos_32),
125  FFT_NAME(ff_cos_64),
126  FFT_NAME(ff_cos_128),
127  FFT_NAME(ff_cos_256),
128  FFT_NAME(ff_cos_512),
129  FFT_NAME(ff_cos_1024),
130  FFT_NAME(ff_cos_2048),
131  FFT_NAME(ff_cos_4096),
132  FFT_NAME(ff_cos_8192),
133  FFT_NAME(ff_cos_16384),
134  FFT_NAME(ff_cos_32768),
135  FFT_NAME(ff_cos_65536),
136  FFT_NAME(ff_cos_131072),
137 };
138 
139 #endif /* FFT_FIXED_32 */
140 
141 static void fft_permute_c(FFTContext *s, FFTComplex *z);
142 static void fft_calc_c(FFTContext *s, FFTComplex *z);
143 
144 static int split_radix_permutation(int i, int n, int inverse)
145 {
146  int m;
147  if(n <= 2) return i&1;
148  m = n >> 1;
149  if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
150  m >>= 1;
151  if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
152  else return split_radix_permutation(i, m, inverse)*4 - 1;
153 }
154 
155 
156 static const int avx_tab[] = {
157  0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
158 };
159 
160 static int is_second_half_of_fft32(int i, int n)
161 {
162  if (n <= 32)
163  return i >= 16;
164  else if (i < n/2)
165  return is_second_half_of_fft32(i, n/2);
166  else if (i < 3*n/4)
167  return is_second_half_of_fft32(i - n/2, n/4);
168  else
169  return is_second_half_of_fft32(i - 3*n/4, n/4);
170 }
171 
173 {
174  int i;
175  int n = 1 << s->nbits;
176 
177  for (i = 0; i < n; i += 16) {
178  int k;
179  if (is_second_half_of_fft32(i, n)) {
180  for (k = 0; k < 16; k++)
181  s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
182  i + avx_tab[k];
183 
184  } else {
185  for (k = 0; k < 16; k++) {
186  int j = i + k;
187  j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
188  s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
189  }
190  }
191  }
192 }
193 
194 av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
195 {
196  int i, j, n;
197 
198  s->revtab = NULL;
199  s->revtab32 = NULL;
200 
201  if (nbits < 2 || nbits > 17)
202  goto fail;
203  s->nbits = nbits;
204  n = 1 << nbits;
205 
206  if (nbits <= 16) {
207  s->revtab = av_malloc(n * sizeof(uint16_t));
208  if (!s->revtab)
209  goto fail;
210  } else {
211  s->revtab32 = av_malloc(n * sizeof(uint32_t));
212  if (!s->revtab32)
213  goto fail;
214  }
215  s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
216  if (!s->tmp_buf)
217  goto fail;
218  s->inverse = inverse;
220 
222  s->fft_calc = fft_calc_c;
223 #if CONFIG_MDCT
227 #endif
228 
229 #if FFT_FIXED_32
230  ff_fft_lut_init();
231 #else /* FFT_FIXED_32 */
232 #if FFT_FLOAT
233  if (ARCH_AARCH64) ff_fft_init_aarch64(s);
234  if (ARCH_ARM) ff_fft_init_arm(s);
235  if (ARCH_PPC) ff_fft_init_ppc(s);
236  if (ARCH_X86) ff_fft_init_x86(s);
237  if (HAVE_MIPSFPU) ff_fft_init_mips(s);
238 #endif
239  for(j=4; j<=nbits; j++) {
241  }
242 #endif /* FFT_FIXED_32 */
243 
244 
245  if (ARCH_X86 && FFT_FLOAT && s->fft_permutation == FF_FFT_PERM_AVX) {
246  fft_perm_avx(s);
247  } else {
248 #define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
249  for(i = 0; i < n; i++) {\
250  int k;\
251  j = i;\
252  j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
253  k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
254  s->revtab##num[k] = j;\
255  } \
256 } while(0);
257 
258 #define PROCESS_FFT_PERM_DEFAULT(num) do {\
259  for(i = 0; i < n; i++) {\
260  int k;\
261  j = i;\
262  k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
263  s->revtab##num[k] = j;\
264  } \
265 } while(0);
266 
267 #define SPLIT_RADIX_PERMUTATION(num) do { \
268  if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
269  PROCESS_FFT_PERM_SWAP_LSBS(num) \
270  } else {\
271  PROCESS_FFT_PERM_DEFAULT(num) \
272  }\
273 } while(0);
274 
275  if (s->revtab)
277  if (s->revtab32)
279 
280 #undef PROCESS_FFT_PERM_DEFAULT
281 #undef PROCESS_FFT_PERM_SWAP_LSBS
282 #undef SPLIT_RADIX_PERMUTATION
283  }
284 
285  return 0;
286  fail:
287  av_freep(&s->revtab);
288  av_freep(&s->revtab32);
289  av_freep(&s->tmp_buf);
290  return -1;
291 }
292 
294 {
295  int j, np;
296  const uint16_t *revtab = s->revtab;
297  const uint32_t *revtab32 = s->revtab32;
298  np = 1 << s->nbits;
299  /* TODO: handle split-radix permute in a more optimal way, probably in-place */
300  if (revtab) {
301  for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
302  } else
303  for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j];
304 
305  memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
306 }
307 
309 {
310  av_freep(&s->revtab);
311  av_freep(&s->revtab32);
312  av_freep(&s->tmp_buf);
313 }
314 
315 #if FFT_FIXED_32
316 
317 static void fft_calc_c(FFTContext *s, FFTComplex *z) {
318 
319  int nbits, i, n, num_transforms, offset, step;
320  int n4, n2, n34;
321  unsigned tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
322  FFTComplex *tmpz;
323  const int fft_size = (1 << s->nbits);
324  int64_t accu;
325 
326  num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
327 
328  for (n=0; n<num_transforms; n++){
329  offset = ff_fft_offsets_lut[n] << 2;
330  tmpz = z + offset;
331 
332  tmp1 = tmpz[0].re + (unsigned)tmpz[1].re;
333  tmp5 = tmpz[2].re + (unsigned)tmpz[3].re;
334  tmp2 = tmpz[0].im + (unsigned)tmpz[1].im;
335  tmp6 = tmpz[2].im + (unsigned)tmpz[3].im;
336  tmp3 = tmpz[0].re - (unsigned)tmpz[1].re;
337  tmp8 = tmpz[2].im - (unsigned)tmpz[3].im;
338  tmp4 = tmpz[0].im - (unsigned)tmpz[1].im;
339  tmp7 = tmpz[2].re - (unsigned)tmpz[3].re;
340 
341  tmpz[0].re = tmp1 + tmp5;
342  tmpz[2].re = tmp1 - tmp5;
343  tmpz[0].im = tmp2 + tmp6;
344  tmpz[2].im = tmp2 - tmp6;
345  tmpz[1].re = tmp3 + tmp8;
346  tmpz[3].re = tmp3 - tmp8;
347  tmpz[1].im = tmp4 - tmp7;
348  tmpz[3].im = tmp4 + tmp7;
349  }
350 
351  if (fft_size < 8)
352  return;
353 
354  num_transforms = (num_transforms >> 1) | 1;
355 
356  for (n=0; n<num_transforms; n++){
357  offset = ff_fft_offsets_lut[n] << 3;
358  tmpz = z + offset;
359 
360  tmp1 = tmpz[4].re + (unsigned)tmpz[5].re;
361  tmp3 = tmpz[6].re + (unsigned)tmpz[7].re;
362  tmp2 = tmpz[4].im + (unsigned)tmpz[5].im;
363  tmp4 = tmpz[6].im + (unsigned)tmpz[7].im;
364  tmp5 = tmp1 + tmp3;
365  tmp7 = tmp1 - tmp3;
366  tmp6 = tmp2 + tmp4;
367  tmp8 = tmp2 - tmp4;
368 
369  tmp1 = tmpz[4].re - (unsigned)tmpz[5].re;
370  tmp2 = tmpz[4].im - (unsigned)tmpz[5].im;
371  tmp3 = tmpz[6].re - (unsigned)tmpz[7].re;
372  tmp4 = tmpz[6].im - (unsigned)tmpz[7].im;
373 
374  tmpz[4].re = tmpz[0].re - tmp5;
375  tmpz[0].re = tmpz[0].re + tmp5;
376  tmpz[4].im = tmpz[0].im - tmp6;
377  tmpz[0].im = tmpz[0].im + tmp6;
378  tmpz[6].re = tmpz[2].re - tmp8;
379  tmpz[2].re = tmpz[2].re + tmp8;
380  tmpz[6].im = tmpz[2].im + tmp7;
381  tmpz[2].im = tmpz[2].im - tmp7;
382 
383  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp1 + tmp2);
384  tmp5 = (int32_t)((accu + 0x40000000) >> 31);
385  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 - tmp4);
386  tmp7 = (int32_t)((accu + 0x40000000) >> 31);
387  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp2 - tmp1);
388  tmp6 = (int32_t)((accu + 0x40000000) >> 31);
389  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 + tmp4);
390  tmp8 = (int32_t)((accu + 0x40000000) >> 31);
391  tmp1 = tmp5 + tmp7;
392  tmp3 = tmp5 - tmp7;
393  tmp2 = tmp6 + tmp8;
394  tmp4 = tmp6 - tmp8;
395 
396  tmpz[5].re = tmpz[1].re - tmp1;
397  tmpz[1].re = tmpz[1].re + tmp1;
398  tmpz[5].im = tmpz[1].im - tmp2;
399  tmpz[1].im = tmpz[1].im + tmp2;
400  tmpz[7].re = tmpz[3].re - tmp4;
401  tmpz[3].re = tmpz[3].re + tmp4;
402  tmpz[7].im = tmpz[3].im + tmp3;
403  tmpz[3].im = tmpz[3].im - tmp3;
404  }
405 
406  step = 1 << ((MAX_LOG2_NFFT-4) - 4);
407  n4 = 4;
408 
409  for (nbits=4; nbits<=s->nbits; nbits++){
410  n2 = 2*n4;
411  n34 = 3*n4;
412  num_transforms = (num_transforms >> 1) | 1;
413 
414  for (n=0; n<num_transforms; n++){
415  const FFTSample *w_re_ptr = ff_w_tab_sr + step;
416  const FFTSample *w_im_ptr = ff_w_tab_sr + MAX_FFT_SIZE/(4*16) - step;
417  offset = ff_fft_offsets_lut[n] << nbits;
418  tmpz = z + offset;
419 
420  tmp5 = tmpz[ n2].re + (unsigned)tmpz[n34].re;
421  tmp1 = tmpz[ n2].re - (unsigned)tmpz[n34].re;
422  tmp6 = tmpz[ n2].im + (unsigned)tmpz[n34].im;
423  tmp2 = tmpz[ n2].im - (unsigned)tmpz[n34].im;
424 
425  tmpz[ n2].re = tmpz[ 0].re - tmp5;
426  tmpz[ 0].re = tmpz[ 0].re + tmp5;
427  tmpz[ n2].im = tmpz[ 0].im - tmp6;
428  tmpz[ 0].im = tmpz[ 0].im + tmp6;
429  tmpz[n34].re = tmpz[n4].re - tmp2;
430  tmpz[ n4].re = tmpz[n4].re + tmp2;
431  tmpz[n34].im = tmpz[n4].im + tmp1;
432  tmpz[ n4].im = tmpz[n4].im - tmp1;
433 
434  for (i=1; i<n4; i++){
435  FFTSample w_re = w_re_ptr[0];
436  FFTSample w_im = w_im_ptr[0];
437  accu = (int64_t)w_re*tmpz[ n2+i].re;
438  accu += (int64_t)w_im*tmpz[ n2+i].im;
439  tmp1 = (int32_t)((accu + 0x40000000) >> 31);
440  accu = (int64_t)w_re*tmpz[ n2+i].im;
441  accu -= (int64_t)w_im*tmpz[ n2+i].re;
442  tmp2 = (int32_t)((accu + 0x40000000) >> 31);
443  accu = (int64_t)w_re*tmpz[n34+i].re;
444  accu -= (int64_t)w_im*tmpz[n34+i].im;
445  tmp3 = (int32_t)((accu + 0x40000000) >> 31);
446  accu = (int64_t)w_re*tmpz[n34+i].im;
447  accu += (int64_t)w_im*tmpz[n34+i].re;
448  tmp4 = (int32_t)((accu + 0x40000000) >> 31);
449 
450  tmp5 = tmp1 + tmp3;
451  tmp1 = tmp1 - tmp3;
452  tmp6 = tmp2 + tmp4;
453  tmp2 = tmp2 - tmp4;
454 
455  tmpz[ n2+i].re = tmpz[ i].re - tmp5;
456  tmpz[ i].re = tmpz[ i].re + tmp5;
457  tmpz[ n2+i].im = tmpz[ i].im - tmp6;
458  tmpz[ i].im = tmpz[ i].im + tmp6;
459  tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
460  tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
461  tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
462  tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
463 
464  w_re_ptr += step;
465  w_im_ptr -= step;
466  }
467  }
468  step >>= 1;
469  n4 <<= 1;
470  }
471 }
472 
473 #else /* FFT_FIXED_32 */
474 
475 #define BUTTERFLIES(a0,a1,a2,a3) {\
476  BF(t3, t5, t5, t1);\
477  BF(a2.re, a0.re, a0.re, t5);\
478  BF(a3.im, a1.im, a1.im, t3);\
479  BF(t4, t6, t2, t6);\
480  BF(a3.re, a1.re, a1.re, t4);\
481  BF(a2.im, a0.im, a0.im, t6);\
482 }
483 
484 // force loading all the inputs before storing any.
485 // this is slightly slower for small data, but avoids store->load aliasing
486 // for addresses separated by large powers of 2.
487 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
488  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
489  BF(t3, t5, t5, t1);\
490  BF(a2.re, a0.re, r0, t5);\
491  BF(a3.im, a1.im, i1, t3);\
492  BF(t4, t6, t2, t6);\
493  BF(a3.re, a1.re, r1, t4);\
494  BF(a2.im, a0.im, i0, t6);\
495 }
496 
497 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
498  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
499  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
500  BUTTERFLIES(a0,a1,a2,a3)\
501 }
502 
503 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
504  t1 = a2.re;\
505  t2 = a2.im;\
506  t5 = a3.re;\
507  t6 = a3.im;\
508  BUTTERFLIES(a0,a1,a2,a3)\
509 }
510 
511 /* z[0...8n-1], w[1...2n-1] */
512 #define PASS(name)\
513 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
514 {\
515  FFTDouble t1, t2, t3, t4, t5, t6;\
516  int o1 = 2*n;\
517  int o2 = 4*n;\
518  int o3 = 6*n;\
519  const FFTSample *wim = wre+o1;\
520  n--;\
521 \
522  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
523  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
524  do {\
525  z += 2;\
526  wre += 2;\
527  wim -= 2;\
528  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
529  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
530  } while(--n);\
531 }
532 
533 PASS(pass)
534 #if !CONFIG_SMALL
535 #undef BUTTERFLIES
536 #define BUTTERFLIES BUTTERFLIES_BIG
537 PASS(pass_big)
538 #endif
539 
540 #define DECL_FFT(n,n2,n4)\
541 static void fft##n(FFTComplex *z)\
542 {\
543  fft##n2(z);\
544  fft##n4(z+n4*2);\
545  fft##n4(z+n4*3);\
546  pass(z,FFT_NAME(ff_cos_##n),n4/2);\
547 }
548 
549 static void fft4(FFTComplex *z)
550 {
551  FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
552 
553  BF(t3, t1, z[0].re, z[1].re);
554  BF(t8, t6, z[3].re, z[2].re);
555  BF(z[2].re, z[0].re, t1, t6);
556  BF(t4, t2, z[0].im, z[1].im);
557  BF(t7, t5, z[2].im, z[3].im);
558  BF(z[3].im, z[1].im, t4, t8);
559  BF(z[3].re, z[1].re, t3, t7);
560  BF(z[2].im, z[0].im, t2, t5);
561 }
562 
563 static void fft8(FFTComplex *z)
564 {
565  FFTDouble t1, t2, t3, t4, t5, t6;
566 
567  fft4(z);
568 
569  BF(t1, z[5].re, z[4].re, -z[5].re);
570  BF(t2, z[5].im, z[4].im, -z[5].im);
571  BF(t5, z[7].re, z[6].re, -z[7].re);
572  BF(t6, z[7].im, z[6].im, -z[7].im);
573 
574  BUTTERFLIES(z[0],z[2],z[4],z[6]);
575  TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
576 }
577 
578 #if !CONFIG_SMALL
579 static void fft16(FFTComplex *z)
580 {
581  FFTDouble t1, t2, t3, t4, t5, t6;
582  FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
583  FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
584 
585  fft8(z);
586  fft4(z+8);
587  fft4(z+12);
588 
589  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
590  TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
591  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
592  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
593 }
594 #else
595 DECL_FFT(16,8,4)
596 #endif
597 DECL_FFT(32,16,8)
598 DECL_FFT(64,32,16)
599 DECL_FFT(128,64,32)
600 DECL_FFT(256,128,64)
601 DECL_FFT(512,256,128)
602 #if !CONFIG_SMALL
603 #define pass pass_big
604 #endif
605 DECL_FFT(1024,512,256)
606 DECL_FFT(2048,1024,512)
607 DECL_FFT(4096,2048,1024)
608 DECL_FFT(8192,4096,2048)
609 DECL_FFT(16384,8192,4096)
610 DECL_FFT(32768,16384,8192)
611 DECL_FFT(65536,32768,16384)
612 DECL_FFT(131072,65536,32768)
613 
614 static void (* const fft_dispatch[])(FFTComplex*) = {
615  fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
616  fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
617 };
618 
619 static void fft_calc_c(FFTContext *s, FFTComplex *z)
620 {
621  fft_dispatch[s->nbits-2](z);
622 }
623 #endif /* FFT_FIXED_32 */
static void fft_permute_c(FFTContext *s, FFTComplex *z)
Definition: fft_template.c:293
#define NULL
Definition: coverity.c:32
#define MAX_FFT_SIZE
Definition: fft_table.h:60
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: fft_template.c:536
float FFTDouble
Definition: fft.h:44
static CosTabsInitOnce cos_tabs_init_once[]
Definition: fft_template.c:95
void ff_fft_lut_init(void)
float re
Definition: fft.c:82
#define M_SQRT1_2
Definition: mathematics.h:58
static const int avx_tab[]
Definition: fft_template.c:156
#define SPLIT_RADIX_PERMUTATION(num)
FFTSample re
Definition: avfft.h:38
#define t8
Definition: regdef.h:53
static int split_radix_permutation(int i, int n, int inverse)
Definition: fft_template.c:144
fg index
#define MAX_LOG2_NFFT
Specifies maximum allowed fft size.
Definition: fft_table.h:59
#define t7
Definition: regdef.h:35
#define av_cold
Definition: attributes.h:88
#define av_malloc(s)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
COSTABLE(16)
av_cold void ff_fft_init_arm(FFTContext *s)
Definition: fft_init_arm.c:38
void ff_fft_init_ppc(FFTContext *s)
Definition: fft_init.c:151
const int32_t ff_w_tab_sr[MAX_FFT_SIZE/(4 *16)]
#define DECL_FFT(n, n2, n4)
Definition: fft_template.c:540
#define AVOnce
Definition: thread.h:172
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: fft_template.c:74
static void(*const fft_dispatch[])(FFTComplex *)
Definition: fft_template.c:614
void(* fft_permute)(struct FFTContext *s, FFTComplex *z)
Do the permutation needed BEFORE calling fft_calc().
Definition: fft.h:96
#define t1
Definition: regdef.h:29
void(* mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:104
void(* imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:102
#define t3
Definition: regdef.h:31
float FFTSample
Definition: avfft.h:35
#define fail()
Definition: checkasm.h:133
static av_cold void fft_perm_avx(FFTContext *s)
Definition: fft_template.c:172
#define pass
Definition: fft_template.c:603
#define Q31(x)
Definition: aac_defines.h:98
Definition: fft.h:83
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
Set up a complex FFT.
Definition: fft_template.c:194
uint32_t * revtab32
Definition: fft.h:107
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define PASS(name)
Definition: fft_template.c:512
int nbits
Definition: fft.h:84
int inverse
Definition: fft.h:85
int32_t
#define s(width, name)
Definition: cbs_vp9.c:257
enum fft_permutation_type fft_permutation
Definition: fft.h:105
#define ff_imdct_half_c
Definition: fft-internal.h:60
#define ff_imdct_calc_c
Definition: fft-internal.h:59
void(* func)(void)
Definition: fft_template.c:70
static int is_second_half_of_fft32(int i, int n)
Definition: fft_template.c:160
#define AV_ONCE_INIT
Definition: thread.h:173
float im
Definition: fft.c:82
#define FFT_FLOAT
Definition: aac_defines.h:82
uint16_t ff_fft_offsets_lut[21845]
#define t5
Definition: regdef.h:33
static av_cold void init_ff_cos_tabs(int index)
Definition: fft_template.c:57
void(* imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:103
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: fft_template.c:497
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: fft_template.c:503
static void fft4(FFTComplex *z)
Definition: fft_template.c:549
int
av_cold void ff_fft_end(FFTContext *s)
Definition: fft_template.c:308
FFTSample im
Definition: avfft.h:38
#define t6
Definition: regdef.h:34
#define COSTABLE_CONST
Definition: fft.h:114
av_cold void ff_fft_init_aarch64(FFTContext *s)
void(* fft_calc)(struct FFTContext *s, FFTComplex *z)
Do a complex FFT with the parameters defined in ff_fft_init().
Definition: fft.h:101
#define t4
Definition: regdef.h:32
void ff_fft_init_mips(FFTContext *s)
FFT transform.
Definition: fft_mips.c:501
#define BF(a, b, c, s)
#define ff_mdct_calc_c
Definition: fft-internal.h:61
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:175
static void fft8(FFTComplex *z)
Definition: fft_template.c:563
COSTABLE_CONST FFTSample *const FFT_NAME(ff_cos_tabs)[]
static const struct twinvq_data tab
#define av_freep(p)
uint16_t * revtab
Definition: fft.h:86
#define M_PI
Definition: mathematics.h:52
static uint32_t inverse(uint32_t v)
find multiplicative inverse modulo 2 ^ 32
Definition: asfcrypt.c:35
av_cold void ff_init_ff_cos_tabs(int index)
Initialize the cosine table in ff_cos_tabs[index].
Definition: fft_template.c:116
static void fft_calc_c(FFTContext *s, FFTComplex *z)
Definition: fft_template.c:619
static void fft16(FFTComplex *z)
Definition: fft_template.c:579
#define t2
Definition: regdef.h:30
int i
Definition: input.c:407
definitions and tables for FFT
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
void ff_fft_init_x86(FFTContext *s)
Definition: fft_init.c:27
FFTComplex * tmp_buf
Definition: fft.h:87