FFmpeg
fft_template.c
Go to the documentation of this file.
1 /*
2  * FFT/IFFT transforms
3  * Copyright (c) 2008 Loren Merritt
4  * Copyright (c) 2002 Fabrice Bellard
5  * Partly based on libdjbfft by D. J. Bernstein
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * FFT/IFFT transforms.
27  */
28 
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libavutil/mathematics.h"
32 #include "libavutil/thread.h"
33 #include "fft.h"
34 #include "fft-internal.h"
35 
36 #if FFT_FIXED_32
37 #include "fft_table.h"
38 
39 static void av_cold fft_lut_init(void)
40 {
41  int n = 0;
42  ff_fft_lut_init(ff_fft_offsets_lut, 0, 1 << 17, &n);
43 }
44 
45 #else /* FFT_FIXED_32 */
46 
47 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
48 #if !CONFIG_HARDCODED_TABLES
49 COSTABLE(16);
50 COSTABLE(32);
51 COSTABLE(64);
52 COSTABLE(128);
53 COSTABLE(256);
54 COSTABLE(512);
55 COSTABLE(1024);
56 COSTABLE(2048);
57 COSTABLE(4096);
58 COSTABLE(8192);
59 COSTABLE(16384);
60 COSTABLE(32768);
61 COSTABLE(65536);
62 COSTABLE(131072);
63 
64 static av_cold void init_ff_cos_tabs(int index)
65 {
66  int i;
67  int m = 1<<index;
68  double freq = 2*M_PI/m;
69  FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
70  for(i=0; i<=m/4; i++)
71  tab[i] = FIX15(cos(i*freq));
72  for(i=1; i<m/4; i++)
73  tab[m/2-i] = tab[i];
74 }
75 
76 typedef struct CosTabsInitOnce {
77  void (*func)(void);
80 
81 #define INIT_FF_COS_TABS_FUNC(index, size) \
82 static av_cold void init_ff_cos_tabs_ ## size (void)\
83 { \
84  init_ff_cos_tabs(index); \
85 }
86 
93 INIT_FF_COS_TABS_FUNC(10, 1024)
94 INIT_FF_COS_TABS_FUNC(11, 2048)
95 INIT_FF_COS_TABS_FUNC(12, 4096)
96 INIT_FF_COS_TABS_FUNC(13, 8192)
97 INIT_FF_COS_TABS_FUNC(14, 16384)
98 INIT_FF_COS_TABS_FUNC(15, 32768)
99 INIT_FF_COS_TABS_FUNC(16, 65536)
100 INIT_FF_COS_TABS_FUNC(17, 131072)
101 
103  { NULL },
104  { NULL },
105  { NULL },
106  { NULL },
107  { init_ff_cos_tabs_16, AV_ONCE_INIT },
108  { init_ff_cos_tabs_32, AV_ONCE_INIT },
109  { init_ff_cos_tabs_64, AV_ONCE_INIT },
110  { init_ff_cos_tabs_128, AV_ONCE_INIT },
111  { init_ff_cos_tabs_256, AV_ONCE_INIT },
112  { init_ff_cos_tabs_512, AV_ONCE_INIT },
113  { init_ff_cos_tabs_1024, AV_ONCE_INIT },
114  { init_ff_cos_tabs_2048, AV_ONCE_INIT },
115  { init_ff_cos_tabs_4096, AV_ONCE_INIT },
116  { init_ff_cos_tabs_8192, AV_ONCE_INIT },
117  { init_ff_cos_tabs_16384, AV_ONCE_INIT },
118  { init_ff_cos_tabs_32768, AV_ONCE_INIT },
119  { init_ff_cos_tabs_65536, AV_ONCE_INIT },
120  { init_ff_cos_tabs_131072, AV_ONCE_INIT },
121 };
122 
123 #endif
124 COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
125  NULL, NULL, NULL, NULL,
126  FFT_NAME(ff_cos_16),
127  FFT_NAME(ff_cos_32),
128  FFT_NAME(ff_cos_64),
129  FFT_NAME(ff_cos_128),
130  FFT_NAME(ff_cos_256),
131  FFT_NAME(ff_cos_512),
132  FFT_NAME(ff_cos_1024),
133  FFT_NAME(ff_cos_2048),
134  FFT_NAME(ff_cos_4096),
135  FFT_NAME(ff_cos_8192),
136  FFT_NAME(ff_cos_16384),
137  FFT_NAME(ff_cos_32768),
138  FFT_NAME(ff_cos_65536),
139  FFT_NAME(ff_cos_131072),
140 };
141 
142 #endif /* FFT_FIXED_32 */
143 
144 static void fft_permute_c(FFTContext *s, FFTComplex *z);
145 static void fft_calc_c(FFTContext *s, FFTComplex *z);
146 
147 static int split_radix_permutation(int i, int n, int inverse)
148 {
149  int m;
150  if(n <= 2) return i&1;
151  m = n >> 1;
152  if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
153  m >>= 1;
154  if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
155  else return split_radix_permutation(i, m, inverse)*4 - 1;
156 }
157 
159 {
160 #if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32)
162 #endif
163 }
164 
165 static const int avx_tab[] = {
166  0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
167 };
168 
169 static int is_second_half_of_fft32(int i, int n)
170 {
171  if (n <= 32)
172  return i >= 16;
173  else if (i < n/2)
174  return is_second_half_of_fft32(i, n/2);
175  else if (i < 3*n/4)
176  return is_second_half_of_fft32(i - n/2, n/4);
177  else
178  return is_second_half_of_fft32(i - 3*n/4, n/4);
179 }
180 
182 {
183  int i;
184  int n = 1 << s->nbits;
185 
186  for (i = 0; i < n; i += 16) {
187  int k;
188  if (is_second_half_of_fft32(i, n)) {
189  for (k = 0; k < 16; k++)
190  s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
191  i + avx_tab[k];
192 
193  } else {
194  for (k = 0; k < 16; k++) {
195  int j = i + k;
196  j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
197  s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
198  }
199  }
200  }
201 }
202 
203 av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
204 {
205  int i, j, n;
206 
207  s->revtab = NULL;
208  s->revtab32 = NULL;
209 
210  if (nbits < 2 || nbits > 17)
211  goto fail;
212  s->nbits = nbits;
213  n = 1 << nbits;
214 
215  if (nbits <= 16) {
216  s->revtab = av_malloc(n * sizeof(uint16_t));
217  if (!s->revtab)
218  goto fail;
219  } else {
220  s->revtab32 = av_malloc(n * sizeof(uint32_t));
221  if (!s->revtab32)
222  goto fail;
223  }
224  s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
225  if (!s->tmp_buf)
226  goto fail;
227  s->inverse = inverse;
229 
231  s->fft_calc = fft_calc_c;
232 #if CONFIG_MDCT
236 #endif
237 
238 #if FFT_FIXED_32
239  {
240  static AVOnce control = AV_ONCE_INIT;
241  ff_thread_once(&control, fft_lut_init);
242  }
243 #else /* FFT_FIXED_32 */
244 #if FFT_FLOAT
245  if (ARCH_AARCH64) ff_fft_init_aarch64(s);
246  if (ARCH_ARM) ff_fft_init_arm(s);
247  if (ARCH_PPC) ff_fft_init_ppc(s);
248  if (ARCH_X86) ff_fft_init_x86(s);
249  if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc;
250  if (HAVE_MIPSFPU) ff_fft_init_mips(s);
251 #else
252  if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c;
253  if (ARCH_ARM) ff_fft_fixed_init_arm(s);
254 #endif
255  for(j=4; j<=nbits; j++) {
257  }
258 #endif /* FFT_FIXED_32 */
259 
260 
261  if (s->fft_permutation == FF_FFT_PERM_AVX) {
262  fft_perm_avx(s);
263  } else {
264 #define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
265  for(i = 0; i < n; i++) {\
266  int k;\
267  j = i;\
268  j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
269  k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
270  s->revtab##num[k] = j;\
271  } \
272 } while(0);
273 
274 #define PROCESS_FFT_PERM_DEFAULT(num) do {\
275  for(i = 0; i < n; i++) {\
276  int k;\
277  j = i;\
278  k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
279  s->revtab##num[k] = j;\
280  } \
281 } while(0);
282 
283 #define SPLIT_RADIX_PERMUTATION(num) do { \
284  if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
285  PROCESS_FFT_PERM_SWAP_LSBS(num) \
286  } else {\
287  PROCESS_FFT_PERM_DEFAULT(num) \
288  }\
289 } while(0);
290 
291  if (s->revtab)
293  if (s->revtab32)
295 
296 #undef PROCESS_FFT_PERM_DEFAULT
297 #undef PROCESS_FFT_PERM_SWAP_LSBS
298 #undef SPLIT_RADIX_PERMUTATION
299  }
300 
301  return 0;
302  fail:
303  av_freep(&s->revtab);
304  av_freep(&s->revtab32);
305  av_freep(&s->tmp_buf);
306  return -1;
307 }
308 
310 {
311  int j, np;
312  const uint16_t *revtab = s->revtab;
313  const uint32_t *revtab32 = s->revtab32;
314  np = 1 << s->nbits;
315  /* TODO: handle split-radix permute in a more optimal way, probably in-place */
316  if (revtab) {
317  for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
318  } else
319  for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j];
320 
321  memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
322 }
323 
325 {
326  av_freep(&s->revtab);
327  av_freep(&s->revtab32);
328  av_freep(&s->tmp_buf);
329 }
330 
331 #if FFT_FIXED_32
332 
333 static void fft_calc_c(FFTContext *s, FFTComplex *z) {
334 
335  int nbits, i, n, num_transforms, offset, step;
336  int n4, n2, n34;
337  unsigned tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
338  FFTComplex *tmpz;
339  const int fft_size = (1 << s->nbits);
340  int64_t accu;
341 
342  num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
343 
344  for (n=0; n<num_transforms; n++){
345  offset = ff_fft_offsets_lut[n] << 2;
346  tmpz = z + offset;
347 
348  tmp1 = tmpz[0].re + (unsigned)tmpz[1].re;
349  tmp5 = tmpz[2].re + (unsigned)tmpz[3].re;
350  tmp2 = tmpz[0].im + (unsigned)tmpz[1].im;
351  tmp6 = tmpz[2].im + (unsigned)tmpz[3].im;
352  tmp3 = tmpz[0].re - (unsigned)tmpz[1].re;
353  tmp8 = tmpz[2].im - (unsigned)tmpz[3].im;
354  tmp4 = tmpz[0].im - (unsigned)tmpz[1].im;
355  tmp7 = tmpz[2].re - (unsigned)tmpz[3].re;
356 
357  tmpz[0].re = tmp1 + tmp5;
358  tmpz[2].re = tmp1 - tmp5;
359  tmpz[0].im = tmp2 + tmp6;
360  tmpz[2].im = tmp2 - tmp6;
361  tmpz[1].re = tmp3 + tmp8;
362  tmpz[3].re = tmp3 - tmp8;
363  tmpz[1].im = tmp4 - tmp7;
364  tmpz[3].im = tmp4 + tmp7;
365  }
366 
367  if (fft_size < 8)
368  return;
369 
370  num_transforms = (num_transforms >> 1) | 1;
371 
372  for (n=0; n<num_transforms; n++){
373  offset = ff_fft_offsets_lut[n] << 3;
374  tmpz = z + offset;
375 
376  tmp1 = tmpz[4].re + (unsigned)tmpz[5].re;
377  tmp3 = tmpz[6].re + (unsigned)tmpz[7].re;
378  tmp2 = tmpz[4].im + (unsigned)tmpz[5].im;
379  tmp4 = tmpz[6].im + (unsigned)tmpz[7].im;
380  tmp5 = tmp1 + tmp3;
381  tmp7 = tmp1 - tmp3;
382  tmp6 = tmp2 + tmp4;
383  tmp8 = tmp2 - tmp4;
384 
385  tmp1 = tmpz[4].re - (unsigned)tmpz[5].re;
386  tmp2 = tmpz[4].im - (unsigned)tmpz[5].im;
387  tmp3 = tmpz[6].re - (unsigned)tmpz[7].re;
388  tmp4 = tmpz[6].im - (unsigned)tmpz[7].im;
389 
390  tmpz[4].re = tmpz[0].re - tmp5;
391  tmpz[0].re = tmpz[0].re + tmp5;
392  tmpz[4].im = tmpz[0].im - tmp6;
393  tmpz[0].im = tmpz[0].im + tmp6;
394  tmpz[6].re = tmpz[2].re - tmp8;
395  tmpz[2].re = tmpz[2].re + tmp8;
396  tmpz[6].im = tmpz[2].im + tmp7;
397  tmpz[2].im = tmpz[2].im - tmp7;
398 
399  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp1 + tmp2);
400  tmp5 = (int32_t)((accu + 0x40000000) >> 31);
401  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 - tmp4);
402  tmp7 = (int32_t)((accu + 0x40000000) >> 31);
403  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp2 - tmp1);
404  tmp6 = (int32_t)((accu + 0x40000000) >> 31);
405  accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 + tmp4);
406  tmp8 = (int32_t)((accu + 0x40000000) >> 31);
407  tmp1 = tmp5 + tmp7;
408  tmp3 = tmp5 - tmp7;
409  tmp2 = tmp6 + tmp8;
410  tmp4 = tmp6 - tmp8;
411 
412  tmpz[5].re = tmpz[1].re - tmp1;
413  tmpz[1].re = tmpz[1].re + tmp1;
414  tmpz[5].im = tmpz[1].im - tmp2;
415  tmpz[1].im = tmpz[1].im + tmp2;
416  tmpz[7].re = tmpz[3].re - tmp4;
417  tmpz[3].re = tmpz[3].re + tmp4;
418  tmpz[7].im = tmpz[3].im + tmp3;
419  tmpz[3].im = tmpz[3].im - tmp3;
420  }
421 
422  step = 1 << ((MAX_LOG2_NFFT-4) - 4);
423  n4 = 4;
424 
425  for (nbits=4; nbits<=s->nbits; nbits++){
426  n2 = 2*n4;
427  n34 = 3*n4;
428  num_transforms = (num_transforms >> 1) | 1;
429 
430  for (n=0; n<num_transforms; n++){
431  const FFTSample *w_re_ptr = ff_w_tab_sr + step;
432  const FFTSample *w_im_ptr = ff_w_tab_sr + MAX_FFT_SIZE/(4*16) - step;
433  offset = ff_fft_offsets_lut[n] << nbits;
434  tmpz = z + offset;
435 
436  tmp5 = tmpz[ n2].re + (unsigned)tmpz[n34].re;
437  tmp1 = tmpz[ n2].re - (unsigned)tmpz[n34].re;
438  tmp6 = tmpz[ n2].im + (unsigned)tmpz[n34].im;
439  tmp2 = tmpz[ n2].im - (unsigned)tmpz[n34].im;
440 
441  tmpz[ n2].re = tmpz[ 0].re - tmp5;
442  tmpz[ 0].re = tmpz[ 0].re + tmp5;
443  tmpz[ n2].im = tmpz[ 0].im - tmp6;
444  tmpz[ 0].im = tmpz[ 0].im + tmp6;
445  tmpz[n34].re = tmpz[n4].re - tmp2;
446  tmpz[ n4].re = tmpz[n4].re + tmp2;
447  tmpz[n34].im = tmpz[n4].im + tmp1;
448  tmpz[ n4].im = tmpz[n4].im - tmp1;
449 
450  for (i=1; i<n4; i++){
451  FFTSample w_re = w_re_ptr[0];
452  FFTSample w_im = w_im_ptr[0];
453  accu = (int64_t)w_re*tmpz[ n2+i].re;
454  accu += (int64_t)w_im*tmpz[ n2+i].im;
455  tmp1 = (int32_t)((accu + 0x40000000) >> 31);
456  accu = (int64_t)w_re*tmpz[ n2+i].im;
457  accu -= (int64_t)w_im*tmpz[ n2+i].re;
458  tmp2 = (int32_t)((accu + 0x40000000) >> 31);
459  accu = (int64_t)w_re*tmpz[n34+i].re;
460  accu -= (int64_t)w_im*tmpz[n34+i].im;
461  tmp3 = (int32_t)((accu + 0x40000000) >> 31);
462  accu = (int64_t)w_re*tmpz[n34+i].im;
463  accu += (int64_t)w_im*tmpz[n34+i].re;
464  tmp4 = (int32_t)((accu + 0x40000000) >> 31);
465 
466  tmp5 = tmp1 + tmp3;
467  tmp1 = tmp1 - tmp3;
468  tmp6 = tmp2 + tmp4;
469  tmp2 = tmp2 - tmp4;
470 
471  tmpz[ n2+i].re = tmpz[ i].re - tmp5;
472  tmpz[ i].re = tmpz[ i].re + tmp5;
473  tmpz[ n2+i].im = tmpz[ i].im - tmp6;
474  tmpz[ i].im = tmpz[ i].im + tmp6;
475  tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
476  tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
477  tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
478  tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
479 
480  w_re_ptr += step;
481  w_im_ptr -= step;
482  }
483  }
484  step >>= 1;
485  n4 <<= 1;
486  }
487 }
488 
489 #else /* FFT_FIXED_32 */
490 
491 #define BUTTERFLIES(a0,a1,a2,a3) {\
492  BF(t3, t5, t5, t1);\
493  BF(a2.re, a0.re, a0.re, t5);\
494  BF(a3.im, a1.im, a1.im, t3);\
495  BF(t4, t6, t2, t6);\
496  BF(a3.re, a1.re, a1.re, t4);\
497  BF(a2.im, a0.im, a0.im, t6);\
498 }
499 
500 // force loading all the inputs before storing any.
501 // this is slightly slower for small data, but avoids store->load aliasing
502 // for addresses separated by large powers of 2.
503 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
504  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
505  BF(t3, t5, t5, t1);\
506  BF(a2.re, a0.re, r0, t5);\
507  BF(a3.im, a1.im, i1, t3);\
508  BF(t4, t6, t2, t6);\
509  BF(a3.re, a1.re, r1, t4);\
510  BF(a2.im, a0.im, i0, t6);\
511 }
512 
513 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
514  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
515  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
516  BUTTERFLIES(a0,a1,a2,a3)\
517 }
518 
519 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
520  t1 = a2.re;\
521  t2 = a2.im;\
522  t5 = a3.re;\
523  t6 = a3.im;\
524  BUTTERFLIES(a0,a1,a2,a3)\
525 }
526 
527 /* z[0...8n-1], w[1...2n-1] */
528 #define PASS(name)\
529 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
530 {\
531  FFTDouble t1, t2, t3, t4, t5, t6;\
532  int o1 = 2*n;\
533  int o2 = 4*n;\
534  int o3 = 6*n;\
535  const FFTSample *wim = wre+o1;\
536  n--;\
537 \
538  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
539  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
540  do {\
541  z += 2;\
542  wre += 2;\
543  wim -= 2;\
544  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
545  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
546  } while(--n);\
547 }
548 
549 PASS(pass)
550 #if !CONFIG_SMALL
551 #undef BUTTERFLIES
552 #define BUTTERFLIES BUTTERFLIES_BIG
553 PASS(pass_big)
554 #endif
555 
556 #define DECL_FFT(n,n2,n4)\
557 static void fft##n(FFTComplex *z)\
558 {\
559  fft##n2(z);\
560  fft##n4(z+n4*2);\
561  fft##n4(z+n4*3);\
562  pass(z,FFT_NAME(ff_cos_##n),n4/2);\
563 }
564 
565 static void fft4(FFTComplex *z)
566 {
567  FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
568 
569  BF(t3, t1, z[0].re, z[1].re);
570  BF(t8, t6, z[3].re, z[2].re);
571  BF(z[2].re, z[0].re, t1, t6);
572  BF(t4, t2, z[0].im, z[1].im);
573  BF(t7, t5, z[2].im, z[3].im);
574  BF(z[3].im, z[1].im, t4, t8);
575  BF(z[3].re, z[1].re, t3, t7);
576  BF(z[2].im, z[0].im, t2, t5);
577 }
578 
579 static void fft8(FFTComplex *z)
580 {
581  FFTDouble t1, t2, t3, t4, t5, t6;
582 
583  fft4(z);
584 
585  BF(t1, z[5].re, z[4].re, -z[5].re);
586  BF(t2, z[5].im, z[4].im, -z[5].im);
587  BF(t5, z[7].re, z[6].re, -z[7].re);
588  BF(t6, z[7].im, z[6].im, -z[7].im);
589 
590  BUTTERFLIES(z[0],z[2],z[4],z[6]);
591  TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
592 }
593 
594 #if !CONFIG_SMALL
595 static void fft16(FFTComplex *z)
596 {
597  FFTDouble t1, t2, t3, t4, t5, t6;
598  FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
599  FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
600 
601  fft8(z);
602  fft4(z+8);
603  fft4(z+12);
604 
605  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
606  TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
607  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
608  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
609 }
610 #else
611 DECL_FFT(16,8,4)
612 #endif
613 DECL_FFT(32,16,8)
614 DECL_FFT(64,32,16)
615 DECL_FFT(128,64,32)
616 DECL_FFT(256,128,64)
617 DECL_FFT(512,256,128)
618 #if !CONFIG_SMALL
619 #define pass pass_big
620 #endif
621 DECL_FFT(1024,512,256)
622 DECL_FFT(2048,1024,512)
623 DECL_FFT(4096,2048,1024)
624 DECL_FFT(8192,4096,2048)
625 DECL_FFT(16384,8192,4096)
626 DECL_FFT(32768,16384,8192)
627 DECL_FFT(65536,32768,16384)
628 DECL_FFT(131072,65536,32768)
629 
630 static void (* const fft_dispatch[])(FFTComplex*) = {
631  fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
632  fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
633 };
634 
635 static void fft_calc_c(FFTContext *s, FFTComplex *z)
636 {
637  fft_dispatch[s->nbits-2](z);
638 }
639 #endif /* FFT_FIXED_32 */
static void fft_permute_c(FFTContext *s, FFTComplex *z)
Definition: fft_template.c:309
#define NULL
Definition: coverity.c:32
#define MAX_FFT_SIZE
Definition: fft_table.h:60
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: fft_template.c:552
float FFTDouble
Definition: fft.h:43
static CosTabsInitOnce cos_tabs_init_once[]
Definition: fft_template.c:102
float re
Definition: fft.c:82
#define M_SQRT1_2
Definition: mathematics.h:58
static const int avx_tab[]
Definition: fft_template.c:165
#define SPLIT_RADIX_PERMUTATION(num)
FFTSample re
Definition: avfft.h:38
#define t8
Definition: regdef.h:53
static int split_radix_permutation(int i, int n, int inverse)
Definition: fft_template.c:147
#define MAX_LOG2_NFFT
Specifies maximum allowed fft size.
Definition: fft_table.h:59
#define sqrthalf
Definition: fft-internal.h:64
#define t7
Definition: regdef.h:35
void ff_fft_lut_init(uint16_t *table, int off, int size, int *index)
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
COSTABLE(16)
av_cold void ff_fft_init_arm(FFTContext *s)
Definition: fft_init_arm.c:38
void ff_fft_init_ppc(FFTContext *s)
Definition: fft_init.c:151
const int32_t ff_w_tab_sr[MAX_FFT_SIZE/(4 *16)]
#define DECL_FFT(n, n2, n4)
Definition: fft_template.c:556
#define AVOnce
Definition: thread.h:159
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: fft_template.c:81
static void(*const fft_dispatch[])(FFTComplex *)
Definition: fft_template.c:630
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define FIX15(a)
Definition: fft-internal.h:62
void(* fft_permute)(struct FFTContext *s, FFTComplex *z)
Do the permutation needed BEFORE calling fft_calc().
Definition: fft.h:101
void(* mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input)
Definition: fft.h:110
#define t1
Definition: regdef.h:29
void(* mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:109
void(* imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:107
#define t3
Definition: regdef.h:31
av_cold void ff_fft_fixed_init_arm(FFTContext *s)
float FFTSample
Definition: avfft.h:35
#define fail()
Definition: checkasm.h:120
static av_cold void fft_perm_avx(FFTContext *s)
Definition: fft_template.c:181
#define pass
Definition: fft_template.c:619
#define Q31(x)
Definition: aac_defines.h:96
Definition: fft.h:88
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
Set up a complex FFT.
Definition: fft_template.c:203
uint32_t * revtab32
Definition: fft.h:113
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define PASS(name)
Definition: fft_template.c:528
int nbits
Definition: fft.h:89
int inverse
Definition: fft.h:90
int32_t
#define s(width, name)
Definition: cbs_vp9.c:257
int n
Definition: avisynth_c.h:760
enum fft_permutation_type fft_permutation
Definition: fft.h:111
#define ff_imdct_half_c
Definition: fft-internal.h:87
#define ff_imdct_calc_c
Definition: fft-internal.h:86
void(* func)(void)
Definition: fft_template.c:77
static int is_second_half_of_fft32(int i, int n)
Definition: fft_template.c:169
#define AV_ONCE_INIT
Definition: thread.h:160
int index
Definition: gxfenc.c:89
float im
Definition: fft.c:82
uint16_t ff_fft_offsets_lut[21845]
#define t5
Definition: regdef.h:33
static av_cold void init_ff_cos_tabs(int index)
Definition: fft_template.c:64
void(* imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:108
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: fft_template.c:513
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: fft_template.c:519
static void fft4(FFTComplex *z)
Definition: fft_template.c:565
int
av_cold void ff_fft_end(FFTContext *s)
Definition: fft_template.c:324
FFTSample im
Definition: avfft.h:38
#define t6
Definition: regdef.h:34
void ff_mdct_calcw_c(FFTContext *s, FFTDouble *output, const FFTSample *input)
Definition: mdct_fixed.c:24
#define COSTABLE_CONST
Definition: fft.h:119
av_cold void ff_fft_init_aarch64(FFTContext *s)
void(* fft_calc)(struct FFTContext *s, FFTComplex *z)
Do a complex FFT with the parameters defined in ff_fft_init().
Definition: fft.h:106
#define t4
Definition: regdef.h:32
void ff_fft_init_mips(FFTContext *s)
FFT transform.
Definition: fft_mips.c:501
#define BF(a, b, c, s)
#define ff_mdct_calc_c
Definition: fft-internal.h:88
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:162
static void fft8(FFTComplex *z)
Definition: fft_template.c:579
COSTABLE_CONST FFTSample *const FFT_NAME(ff_cos_tabs)[]
static const struct twinvq_data tab
#define av_freep(p)
uint16_t * revtab
Definition: fft.h:91
#define M_PI
Definition: mathematics.h:52
static uint32_t inverse(uint32_t v)
find multiplicative inverse modulo 2 ^ 32
Definition: asfcrypt.c:35
av_cold void ff_init_ff_cos_tabs(int index)
Initialize the cosine table in ff_cos_tabs[index].
Definition: fft_template.c:158
static void fft_calc_c(FFTContext *s, FFTComplex *z)
Definition: fft_template.c:635
static void fft16(FFTComplex *z)
Definition: fft_template.c:595
#define t2
Definition: regdef.h:30
definitions and tables for FFT
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
void ff_fft_init_x86(FFTContext *s)
Definition: fft_init.c:27
FFTComplex * tmp_buf
Definition: fft.h:92