FFmpeg
tx.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Lynne <dev@lynne.ee>
3  * Power of two FFT:
4  * Copyright (c) 2008 Loren Merritt
5  * Copyright (c) 2002 Fabrice Bellard
6  * Partly based on libdjbfft by D. J. Bernstein
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include <stddef.h>
26 #include "tx.h"
27 #include "thread.h"
28 #include "mem.h"
29 #include "avassert.h"
30 
31 typedef float FFTSample;
33 
34 struct AVTXContext {
35  int n; /* Nptwo part */
36  int m; /* Ptwo part */
37 
38  FFTComplex *exptab; /* MDCT exptab */
39  FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */
40  int *pfatab; /* Input/Output mapping for compound transforms */
41  int *revtab; /* Input mapping for power of two transforms */
42 };
43 
44 #define FFT_NAME(x) x
45 
46 #define COSTABLE(size) \
47  static DECLARE_ALIGNED(32, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
48 
49 static FFTSample * const FFT_NAME(ff_cos_tabs)[18];
50 
51 COSTABLE(16);
52 COSTABLE(32);
53 COSTABLE(64);
54 COSTABLE(128);
55 COSTABLE(256);
56 COSTABLE(512);
57 COSTABLE(1024);
58 COSTABLE(2048);
59 COSTABLE(4096);
60 COSTABLE(8192);
61 COSTABLE(16384);
62 COSTABLE(32768);
63 COSTABLE(65536);
64 COSTABLE(131072);
65 
66 static av_cold void init_ff_cos_tabs(int index)
67 {
68  int m = 1 << index;
69  double freq = 2*M_PI/m;
70  FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
71  for(int i = 0; i <= m/4; i++)
72  tab[i] = cos(i*freq);
73  for(int i = 1; i < m/4; i++)
74  tab[m/2 - i] = tab[i];
75 }
76 
77 typedef struct CosTabsInitOnce {
78  void (*func)(void);
79  AVOnce control;
81 
82 #define INIT_FF_COS_TABS_FUNC(index, size) \
83 static av_cold void init_ff_cos_tabs_ ## size (void) \
84 { \
85  init_ff_cos_tabs(index); \
86 }
87 
94 INIT_FF_COS_TABS_FUNC(10, 1024)
95 INIT_FF_COS_TABS_FUNC(11, 2048)
96 INIT_FF_COS_TABS_FUNC(12, 4096)
97 INIT_FF_COS_TABS_FUNC(13, 8192)
98 INIT_FF_COS_TABS_FUNC(14, 16384)
99 INIT_FF_COS_TABS_FUNC(15, 32768)
100 INIT_FF_COS_TABS_FUNC(16, 65536)
101 INIT_FF_COS_TABS_FUNC(17, 131072)
102 
104  { NULL },
105  { NULL },
106  { NULL },
107  { NULL },
108  { init_ff_cos_tabs_16, AV_ONCE_INIT },
109  { init_ff_cos_tabs_32, AV_ONCE_INIT },
110  { init_ff_cos_tabs_64, AV_ONCE_INIT },
111  { init_ff_cos_tabs_128, AV_ONCE_INIT },
112  { init_ff_cos_tabs_256, AV_ONCE_INIT },
113  { init_ff_cos_tabs_512, AV_ONCE_INIT },
114  { init_ff_cos_tabs_1024, AV_ONCE_INIT },
115  { init_ff_cos_tabs_2048, AV_ONCE_INIT },
116  { init_ff_cos_tabs_4096, AV_ONCE_INIT },
117  { init_ff_cos_tabs_8192, AV_ONCE_INIT },
118  { init_ff_cos_tabs_16384, AV_ONCE_INIT },
119  { init_ff_cos_tabs_32768, AV_ONCE_INIT },
120  { init_ff_cos_tabs_65536, AV_ONCE_INIT },
121  { init_ff_cos_tabs_131072, AV_ONCE_INIT },
122 };
123 
124 static FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
125  NULL, NULL, NULL, NULL,
126  FFT_NAME(ff_cos_16),
127  FFT_NAME(ff_cos_32),
128  FFT_NAME(ff_cos_64),
129  FFT_NAME(ff_cos_128),
130  FFT_NAME(ff_cos_256),
131  FFT_NAME(ff_cos_512),
132  FFT_NAME(ff_cos_1024),
133  FFT_NAME(ff_cos_2048),
134  FFT_NAME(ff_cos_4096),
135  FFT_NAME(ff_cos_8192),
136  FFT_NAME(ff_cos_16384),
137  FFT_NAME(ff_cos_32768),
138  FFT_NAME(ff_cos_65536),
139  FFT_NAME(ff_cos_131072),
140 };
141 
143 {
144  ff_thread_once(&cos_tabs_init_once[index].control,
145  cos_tabs_init_once[index].func);
146 }
147 
150 
151 static av_cold void ff_init_53_tabs(void)
152 {
153  ff_53_tabs[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) };
154  ff_53_tabs[1] = (FFTComplex){ 0.5, 0.5 };
155  ff_53_tabs[2] = (FFTComplex){ cos(2 * M_PI / 5), sin(2 * M_PI / 5) };
156  ff_53_tabs[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) };
157 }
158 
159 #define BF(x, y, a, b) do { \
160  x = (a) - (b); \
161  y = (a) + (b); \
162  } while (0)
163 
164 #define CMUL(dre, dim, are, aim, bre, bim) do { \
165  (dre) = (are) * (bre) - (aim) * (bim); \
166  (dim) = (are) * (bim) + (aim) * (bre); \
167  } while (0)
168 
169 #define CMUL3(c, a, b) CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
170 
172  ptrdiff_t stride)
173 {
174  FFTComplex tmp[2];
175 
176  tmp[0].re = in[1].im - in[2].im;
177  tmp[0].im = in[1].re - in[2].re;
178  tmp[1].re = in[1].re + in[2].re;
179  tmp[1].im = in[1].im + in[2].im;
180 
181  out[0*stride].re = in[0].re + tmp[1].re;
182  out[0*stride].im = in[0].im + tmp[1].im;
183 
184  tmp[0].re *= ff_53_tabs[0].re;
185  tmp[0].im *= ff_53_tabs[0].im;
186  tmp[1].re *= ff_53_tabs[1].re;
187  tmp[1].im *= ff_53_tabs[1].re;
188 
189  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
190  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
191  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
192  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
193 }
194 
195 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
196 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
197  ptrdiff_t stride) \
198 { \
199  FFTComplex z0[4], t[6]; \
200  \
201  t[0].re = in[1].re + in[4].re; \
202  t[0].im = in[1].im + in[4].im; \
203  t[1].im = in[1].re - in[4].re; \
204  t[1].re = in[1].im - in[4].im; \
205  t[2].re = in[2].re + in[3].re; \
206  t[2].im = in[2].im + in[3].im; \
207  t[3].im = in[2].re - in[3].re; \
208  t[3].re = in[2].im - in[3].im; \
209  \
210  out[D0*stride].re = in[0].re + in[1].re + in[2].re + \
211  in[3].re + in[4].re; \
212  out[D0*stride].im = in[0].im + in[1].im + in[2].im + \
213  in[3].im + in[4].im; \
214  \
215  t[4].re = ff_53_tabs[2].re * t[2].re - ff_53_tabs[3].re * t[0].re; \
216  t[4].im = ff_53_tabs[2].re * t[2].im - ff_53_tabs[3].re * t[0].im; \
217  t[0].re = ff_53_tabs[2].re * t[0].re - ff_53_tabs[3].re * t[2].re; \
218  t[0].im = ff_53_tabs[2].re * t[0].im - ff_53_tabs[3].re * t[2].im; \
219  t[5].re = ff_53_tabs[2].im * t[3].re - ff_53_tabs[3].im * t[1].re; \
220  t[5].im = ff_53_tabs[2].im * t[3].im - ff_53_tabs[3].im * t[1].im; \
221  t[1].re = ff_53_tabs[2].im * t[1].re + ff_53_tabs[3].im * t[3].re; \
222  t[1].im = ff_53_tabs[2].im * t[1].im + ff_53_tabs[3].im * t[3].im; \
223  \
224  z0[0].re = t[0].re - t[1].re; \
225  z0[0].im = t[0].im - t[1].im; \
226  z0[1].re = t[4].re + t[5].re; \
227  z0[1].im = t[4].im + t[5].im; \
228  \
229  z0[2].re = t[4].re - t[5].re; \
230  z0[2].im = t[4].im - t[5].im; \
231  z0[3].re = t[0].re + t[1].re; \
232  z0[3].im = t[0].im + t[1].im; \
233  \
234  out[D1*stride].re = in[0].re + z0[3].re; \
235  out[D1*stride].im = in[0].im + z0[0].im; \
236  out[D2*stride].re = in[0].re + z0[2].re; \
237  out[D2*stride].im = in[0].im + z0[1].im; \
238  out[D3*stride].re = in[0].re + z0[1].re; \
239  out[D3*stride].im = in[0].im + z0[2].im; \
240  out[D4*stride].re = in[0].re + z0[0].re; \
241  out[D4*stride].im = in[0].im + z0[3].im; \
242 }
243 
244 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
245 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
246 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
247 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
248 
250  ptrdiff_t stride)
251 {
252  FFTComplex tmp[15];
253 
254  for (int i = 0; i < 5; i++)
255  fft3(tmp + i, in + i*3, 5);
256 
257  fft5_m1(out, tmp + 0, stride);
258  fft5_m2(out, tmp + 5, stride);
259  fft5_m3(out, tmp + 10, stride);
260 }
261 
262 #define BUTTERFLIES(a0,a1,a2,a3) {\
263  BF(t3, t5, t5, t1);\
264  BF(a2.re, a0.re, a0.re, t5);\
265  BF(a3.im, a1.im, a1.im, t3);\
266  BF(t4, t6, t2, t6);\
267  BF(a3.re, a1.re, a1.re, t4);\
268  BF(a2.im, a0.im, a0.im, t6);\
269 }
270 
271 // force loading all the inputs before storing any.
272 // this is slightly slower for small data, but avoids store->load aliasing
273 // for addresses separated by large powers of 2.
274 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
275  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
276  BF(t3, t5, t5, t1);\
277  BF(a2.re, a0.re, r0, t5);\
278  BF(a3.im, a1.im, i1, t3);\
279  BF(t4, t6, t2, t6);\
280  BF(a3.re, a1.re, r1, t4);\
281  BF(a2.im, a0.im, i0, t6);\
282 }
283 
284 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
285  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
286  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
287  BUTTERFLIES(a0,a1,a2,a3)\
288 }
289 
290 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
291  t1 = a2.re;\
292  t2 = a2.im;\
293  t5 = a3.re;\
294  t6 = a3.im;\
295  BUTTERFLIES(a0,a1,a2,a3)\
296 }
297 
298 /* z[0...8n-1], w[1...2n-1] */
299 #define PASS(name)\
300 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
301 {\
302  FFTSample t1, t2, t3, t4, t5, t6;\
303  int o1 = 2*n;\
304  int o2 = 4*n;\
305  int o3 = 6*n;\
306  const FFTSample *wim = wre+o1;\
307  n--;\
308 \
309  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
310  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
311  do {\
312  z += 2;\
313  wre += 2;\
314  wim -= 2;\
315  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
316  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
317  } while(--n);\
318 }
319 
320 PASS(pass)
321 #undef BUTTERFLIES
322 #define BUTTERFLIES BUTTERFLIES_BIG
323 PASS(pass_big)
324 
325 #define DECL_FFT(n,n2,n4)\
326 static void fft##n(FFTComplex *z)\
327 {\
328  fft##n2(z);\
329  fft##n4(z+n4*2);\
330  fft##n4(z+n4*3);\
331  pass(z,FFT_NAME(ff_cos_##n),n4/2);\
332 }
333 
334 static void fft4(FFTComplex *z)
335 {
336  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
337 
338  BF(t3, t1, z[0].re, z[1].re);
339  BF(t8, t6, z[3].re, z[2].re);
340  BF(z[2].re, z[0].re, t1, t6);
341  BF(t4, t2, z[0].im, z[1].im);
342  BF(t7, t5, z[2].im, z[3].im);
343  BF(z[3].im, z[1].im, t4, t8);
344  BF(z[3].re, z[1].re, t3, t7);
345  BF(z[2].im, z[0].im, t2, t5);
346 }
347 
348 static void fft8(FFTComplex *z)
349 {
350  FFTSample t1, t2, t3, t4, t5, t6;
351 
352  fft4(z);
353 
354  BF(t1, z[5].re, z[4].re, -z[5].re);
355  BF(t2, z[5].im, z[4].im, -z[5].im);
356  BF(t5, z[7].re, z[6].re, -z[7].re);
357  BF(t6, z[7].im, z[6].im, -z[7].im);
358 
359  BUTTERFLIES(z[0],z[2],z[4],z[6]);
360  TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2);
361 }
362 
363 static void fft16(FFTComplex *z)
364 {
365  FFTSample t1, t2, t3, t4, t5, t6;
366  FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
367  FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
368 
369  fft8(z);
370  fft4(z+8);
371  fft4(z+12);
372 
373  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
374  TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2);
375  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
376  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
377 }
378 
379 DECL_FFT(32,16,8)
380 DECL_FFT(64,32,16)
381 DECL_FFT(128,64,32)
382 DECL_FFT(256,128,64)
383 DECL_FFT(512,256,128)
384 #define pass pass_big
385 DECL_FFT(1024,512,256)
386 DECL_FFT(2048,1024,512)
387 DECL_FFT(4096,2048,1024)
388 DECL_FFT(8192,4096,2048)
389 DECL_FFT(16384,8192,4096)
390 DECL_FFT(32768,16384,8192)
391 DECL_FFT(65536,32768,16384)
392 DECL_FFT(131072,65536,32768)
393 
394 static void (* const fft_dispatch[])(FFTComplex*) = {
395  fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
396  fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
397 };
398 
399 #define DECL_COMP_FFT(N) \
400 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
401  void *_in, ptrdiff_t stride) \
402 { \
403  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
404  FFTComplex *in = _in; \
405  FFTComplex *out = _out; \
406  FFTComplex fft##N##in[N]; \
407  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m) - 2]; \
408  \
409  for (int i = 0; i < m; i++) { \
410  for (int j = 0; j < N; j++) \
411  fft##N##in[j] = in[in_map[i*N + j]]; \
412  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
413  } \
414  \
415  for (int i = 0; i < N; i++) \
416  fftp(s->tmp + m*i); \
417  \
418  for (int i = 0; i < N*m; i++) \
419  out[i] = s->tmp[out_map[i]]; \
420 }
421 
422 DECL_COMP_FFT(3)
423 DECL_COMP_FFT(5)
424 DECL_COMP_FFT(15)
425 
426 static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
427  ptrdiff_t stride)
428 {
429  FFTComplex *in = _in;
430  FFTComplex *out = _out;
431  int m = s->m, mb = av_log2(m) - 2;
432  for (int i = 0; i < m; i++)
433  out[s->revtab[i]] = in[i];
434  fft_dispatch[mb](out);
435 }
436 
437 #define DECL_COMP_IMDCT(N) \
438 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
439  ptrdiff_t stride) \
440 { \
441  FFTComplex fft##N##in[N]; \
442  FFTComplex *z = _dst, *exp = s->exptab; \
443  const int m = s->m, len8 = N*m >> 1; \
444  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
445  const float *src = _src, *in1, *in2; \
446  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
447  \
448  stride /= sizeof(*src); /* To convert it from bytes */ \
449  in1 = src; \
450  in2 = src + ((N*m*2) - 1) * stride; \
451  \
452  for (int i = 0; i < m; i++) { \
453  for (int j = 0; j < N; j++) { \
454  const int k = in_map[i*N + j]; \
455  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
456  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
457  } \
458  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
459  } \
460  \
461  for (int i = 0; i < N; i++) \
462  fftp(s->tmp + m*i); \
463  \
464  for (int i = 0; i < len8; i++) { \
465  const int i0 = len8 + i, i1 = len8 - i - 1; \
466  const int s0 = out_map[i0], s1 = out_map[i1]; \
467  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
468  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
469  \
470  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
471  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
472  } \
473 }
474 
477 DECL_COMP_IMDCT(15)
478 
479 #define DECL_COMP_MDCT(N) \
480 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
481  ptrdiff_t stride) \
482 { \
483  float *src = _src, *dst = _dst; \
484  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
485  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
486  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
487  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
488  \
489  stride /= sizeof(*dst); \
490  \
491  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
492  for (int j = 0; j < N; j++) { \
493  const int k = in_map[i*N + j]; \
494  if (k < len4) { \
495  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; \
496  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; \
497  } else { \
498  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; \
499  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; \
500  } \
501  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
502  exp[k >> 1].re, exp[k >> 1].im); \
503  } \
504  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
505  } \
506  \
507  for (int i = 0; i < N; i++) \
508  fftp(s->tmp + m*i); \
509  \
510  for (int i = 0; i < len8; i++) { \
511  const int i0 = len8 + i, i1 = len8 - i - 1; \
512  const int s0 = out_map[i0], s1 = out_map[i1]; \
513  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
514  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
515  \
516  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
517  exp[i0].im, exp[i0].re); \
518  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
519  exp[i1].im, exp[i1].re); \
520  } \
521 }
522 
525 DECL_COMP_MDCT(15)
526 
527 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
528  ptrdiff_t stride)
529 {
530  FFTComplex *z = _dst, *exp = s->exptab;
531  const int m = s->m, len8 = m >> 1;
532  const float *src = _src, *in1, *in2;
533  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
534 
535  stride /= sizeof(*src);
536  in1 = src;
537  in2 = src + ((m*2) - 1) * stride;
538 
539  for (int i = 0; i < m; i++) {
540  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
541  CMUL3(z[s->revtab[i]], tmp, exp[i]);
542  }
543 
544  fftp(z);
545 
546  for (int i = 0; i < len8; i++) {
547  const int i0 = len8 + i, i1 = len8 - i - 1;
548  FFTComplex src1 = { z[i1].im, z[i1].re };
549  FFTComplex src0 = { z[i0].im, z[i0].re };
550 
551  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
552  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
553  }
554 }
555 
556 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
557  ptrdiff_t stride)
558 {
559  float *src = _src, *dst = _dst;
560  FFTComplex *exp = s->exptab, tmp, *z = _dst;
561  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
562  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
563 
564  stride /= sizeof(*dst);
565 
566  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
567  const int k = 2*i;
568  if (k < len4) {
569  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
570  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
571  } else {
572  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
573  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
574  }
575  CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
576  exp[i].re, exp[i].im);
577  }
578 
579  fftp(z);
580 
581  for (int i = 0; i < len8; i++) {
582  const int i0 = len8 + i, i1 = len8 - i - 1;
583  FFTComplex src1 = { z[i1].re, z[i1].im };
584  FFTComplex src0 = { z[i0].re, z[i0].im };
585 
586  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
587  exp[i0].im, exp[i0].re);
588  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
589  exp[i1].im, exp[i1].re);
590  }
591 }
592 
593 /* Calculates the modular multiplicative inverse, not fast, replace */
594 static int mulinv(int n, int m)
595 {
596  n = n % m;
597  for (int x = 1; x < m; x++)
598  if (((n * x) % m) == 1)
599  return x;
600  av_assert0(0); /* Never reached */
601 }
602 
603 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
604 static int gen_compound_mapping(AVTXContext *s, int n, int m, int inv,
605  enum AVTXType type)
606 {
607  int *in_map, *out_map;
608  const int len = n*m;
609  const int m_inv = mulinv(m, n);
610  const int n_inv = mulinv(n, m);
611  const int mdct = type == AV_TX_FLOAT_MDCT;
612 
613  if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab))))
614  return AVERROR(ENOMEM);
615 
616  in_map = s->pfatab;
617  out_map = s->pfatab + n*m;
618 
619  /* Ruritanian map for input, CRT map for output, can be swapped */
620  for (int j = 0; j < m; j++) {
621  for (int i = 0; i < n; i++) {
622  /* Shifted by 1 to simplify forward MDCTs */
623  in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
624  out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
625  }
626  }
627 
628  /* Change transform direction by reversing all ACs */
629  if (inv) {
630  for (int i = 0; i < m; i++) {
631  int *in = &in_map[i*n + 1]; /* Skip the DC */
632  for (int j = 0; j < ((n - 1) >> 1); j++)
633  FFSWAP(int, in[j], in[n - j - 2]);
634  }
635  }
636 
637  /* Our 15-point transform is also a compound one, so embed its input map */
638  if (n == 15) {
639  for (int k = 0; k < m; k++) {
640  int tmp[15];
641  memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
642  for (int i = 0; i < 5; i++) {
643  for (int j = 0; j < 3; j++)
644  in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
645  }
646  }
647  }
648 
649  return 0;
650 }
651 
652 static int split_radix_permutation(int i, int n, int inverse)
653 {
654  int m;
655  if (n <= 2)
656  return i & 1;
657  m = n >> 1;
658  if (!(i & m))
659  return split_radix_permutation(i, m, inverse)*2;
660  m >>= 1;
661  if (inverse == !(i & m))
662  return split_radix_permutation(i, m, inverse)*4 + 1;
663  else
664  return split_radix_permutation(i, m, inverse)*4 - 1;
665 }
666 
667 static int get_ptwo_revtab(AVTXContext *s, int m, int inv)
668 {
669  if (!(s->revtab = av_malloc(m*sizeof(*s->revtab))))
670  return AVERROR(ENOMEM);
671 
672  /* Default */
673  for (int i = 0; i < m; i++) {
674  int k = -split_radix_permutation(i, m, inv) & (m - 1);
675  s->revtab[k] = i;
676  }
677 
678  return 0;
679 }
680 
681 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
682 {
683  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
684 
685  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
686  return AVERROR(ENOMEM);
687 
688  scale = sqrt(fabs(scale));
689  for (int i = 0; i < len4; i++) {
690  const double alpha = M_PI_2 * (i + theta) / len4;
691  s->exptab[i].re = cos(alpha) * scale;
692  s->exptab[i].im = sin(alpha) * scale;
693  }
694 
695  return 0;
696 }
697 
699 {
700  if (!(*ctx))
701  return;
702 
703  av_free((*ctx)->pfatab);
704  av_free((*ctx)->exptab);
705  av_free((*ctx)->revtab);
706  av_free((*ctx)->tmp);
707 
708  av_freep(ctx);
709 }
710 
712  int inv, int len, const void *scale, uint64_t flags)
713 {
714  int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
715 
716  if (type == AV_TX_FLOAT_MDCT)
717  len >>= 1;
718 
719 #define CHECK_FACTOR(DST, FACTOR, SRC) \
720  if (DST == 1 && !(SRC % FACTOR)) { \
721  DST = FACTOR; \
722  SRC /= FACTOR; \
723  }
724  CHECK_FACTOR(n, 15, len)
725  CHECK_FACTOR(n, 5, len)
726  CHECK_FACTOR(n, 3, len)
727 #undef CHECK_NPTWO_FACTOR
728 
729  /* len must be a power of two now */
730  if (!(len & (len - 1)) && len >= 4 && len <= max_ptwo) {
731  m = len;
732  len = 1;
733  }
734 
735  /* Filter out direct 3, 5 and 15 transforms, too niche */
736  if (len > 1 || m == 1) {
737  av_log(NULL, AV_LOG_ERROR, "Unsupported transform size: n = %i, "
738  "m = %i, residual = %i!\n", n, m, len);
739  return AVERROR(EINVAL);
740  } else if (n > 1 && m > 1) { /* 2D transform case */
741  if ((err = gen_compound_mapping(s, n, m, inv, type)))
742  return err;
743  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
744  return AVERROR(ENOMEM);
745  *tx = n == 3 ? compound_fft_3xM :
746  n == 5 ? compound_fft_5xM :
747  compound_fft_15xM;
748  if (type == AV_TX_FLOAT_MDCT)
749  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
750  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
751  inv ? compound_imdct_15xM : compound_mdct_15xM;
752  } else { /* Direct transform case */
753  *tx = monolithic_fft;
754  if (type == AV_TX_FLOAT_MDCT)
755  *tx = inv ? monolithic_imdct : monolithic_mdct;
756  }
757 
758  if (n != 1)
760  if (m != 1) {
761  get_ptwo_revtab(s, m, inv);
762  for (int i = 4; i <= av_log2(m); i++)
764  }
765 
766  if (type == AV_TX_FLOAT_MDCT)
767  if ((err = gen_mdct_exptab(s, n*m, *((float *)scale))))
768  return err;
769 
770  s->n = n;
771  s->m = m;
772 
773  return 0;
774 }
775 
777  int inv, int len, const void *scale, uint64_t flags)
778 {
779  int err;
780  AVTXContext *s = av_mallocz(sizeof(*s));
781  if (!s)
782  return AVERROR(ENOMEM);
783 
784  switch (type) {
785  case AV_TX_FLOAT_FFT:
786  case AV_TX_FLOAT_MDCT:
787  if ((err = init_mdct_fft(s, tx, type, inv, len, scale, flags)))
788  goto fail;
789  break;
790  default:
791  err = AVERROR(EINVAL);
792  goto fail;
793  }
794 
795  *ctx = s;
796 
797  return 0;
798 
799 fail:
800  av_tx_uninit(&s);
801  *tx = NULL;
802  return err;
803 }
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx.c:249
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets ctx to NULL, does nothing when ctx == NULL.
Definition: tx.c:698
#define NULL
Definition: coverity.c:32
AVComplexFloat FFTComplex
Definition: tx.c:32
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx.c:394
static void monolithic_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx.c:426
float re
Definition: fft.c:82
Memory handling functions.
static FFTComplex ff_53_tabs[4]
Definition: tx.c:149
#define M_SQRT1_2
Definition: mathematics.h:58
static av_cold void init_ff_cos_tabs(int index)
Definition: tx.c:66
static av_cold void ff_init_53_tabs(void)
Definition: tx.c:151
int av_log2(unsigned v)
Definition: intmath.c:26
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx.c:322
GLint GLenum type
Definition: opengl_enc.c:104
FFTSample re
Definition: avfft.h:38
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
#define t8
Definition: regdef.h:53
#define src
Definition: vp8dsp.c:254
static av_cold void ff_init_ff_cos_tabs(int index)
Definition: tx.c:142
#define pass
Definition: tx.c:384
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx.c:103
#define t7
Definition: regdef.h:35
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
#define DECL_COMP_FFT(N)
Definition: tx.c:399
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
#define mb
#define CMUL3(c, a, b)
Definition: tx.c:169
int m
Definition: tx.c:36
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
static int split_radix_permutation(int i, int n, int inverse)
Definition: tx.c:652
#define AVOnce
Definition: thread.h:159
#define PASS(name)
Definition: tx.c:299
#define av_log(a,...)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:260
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:56
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx.c:195
float FFTSample
Definition: tx.c:31
#define t1
Definition: regdef.h:29
AVTXType
Definition: tx.h:31
static int mulinv(int n, int m)
Definition: tx.c:594
simple assert() macros that are a bit more flexible than ISO C assert().
#define t3
Definition: regdef.h:31
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: tx.c:164
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx.c:556
float FFTSample
Definition: avfft.h:35
static void fft8(FFTComplex *z)
Definition: tx.c:348
#define fail()
Definition: checkasm.h:120
int8_t exp
Definition: eval.c:72
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx.c:681
Standard complex to complex FFT with sample data type AVComplexFloat.
Definition: tx.h:36
FFTComplex * exptab
Definition: tx.c:38
#define COSTABLE(size)
Definition: tx.c:46
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define M_PI_2
Definition: mathematics.h:55
int * revtab
Definition: tx.c:41
AVFormatContext * ctx
Definition: movenc.c:48
#define s(width, name)
Definition: cbs_vp9.c:257
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: tx.c:290
Definition: tx.c:34
#define CHECK_FACTOR(DST, FACTOR, SRC)
int * pfatab
Definition: tx.c:40
#define DECL_COMP_MDCT(N)
Definition: tx.c:479
FFTComplex * tmp
Definition: tx.c:39
#define FF_ARRAY_ELEMS(a)
#define DECL_COMP_IMDCT(N)
Definition: tx.c:437
#define src1
Definition: h264pred.c:139
#define AV_ONCE_INIT
Definition: thread.h:160
#define FFT_NAME(x)
Definition: tx.c:44
static const int16_t alpha[]
Definition: ilbcdata.h:55
static AVOnce tabs_53_once
Definition: tx.c:148
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx.c:284
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
int index
Definition: gxfenc.c:89
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx.c:527
float im
Definition: fft.c:82
static int gen_compound_mapping(AVTXContext *s, int n, int m, int inv, enum AVTXType type)
Definition: tx.c:604
#define src0
Definition: h264pred.c:138
#define t5
Definition: regdef.h:33
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration Currently power of two lengths from 4 to ...
Definition: tx.c:776
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx.c:82
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx.c:171
#define flags(name, subs,...)
Definition: cbs_av1.c:561
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
FFTSample im
Definition: avfft.h:38
#define t6
Definition: regdef.h:34
static int init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx.c:711
#define t4
Definition: regdef.h:32
Standard MDCT with sample data type of float and a scale type of float.
Definition: tx.h:41
#define av_free(p)
static int get_ptwo_revtab(AVTXContext *s, int m, int inv)
Definition: tx.c:667
int len
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:162
static const struct twinvq_data tab
#define DECL_FFT(n, n2, n4)
Definition: tx.c:325
FILE * out
Definition: movenc.c:54
#define av_freep(p)
#define BF(x, y, a, b)
Definition: tx.c:159
#define av_always_inline
Definition: attributes.h:39
#define M_PI
Definition: mathematics.h:52
static void fft16(FFTComplex *z)
Definition: tx.c:363
static uint32_t inverse(uint32_t v)
find multiplicative inverse modulo 2 ^ 32
Definition: asfcrypt.c:35
#define av_malloc_array(a, b)
#define FFSWAP(type, a, b)
Definition: common.h:99
static void fft4(FFTComplex *z)
Definition: tx.c:334
#define stride
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int n
Definition: tx.c:35
#define t2
Definition: regdef.h:30