FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Lynne <dev@lynne.ee>
3  * Power of two FFT:
4  * Copyright (c) 2008 Loren Merritt
5  * Copyright (c) 2002 Fabrice Bellard
6  * Partly based on libdjbfft by D. J. Bernstein
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /* All costabs for a type are defined here */
26 COSTABLE(16);
27 COSTABLE(32);
28 COSTABLE(64);
29 COSTABLE(128);
30 COSTABLE(256);
31 COSTABLE(512);
32 COSTABLE(1024);
33 COSTABLE(2048);
34 COSTABLE(4096);
35 COSTABLE(8192);
36 COSTABLE(16384);
37 COSTABLE(32768);
38 COSTABLE(65536);
39 COSTABLE(131072);
40 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_53))[4];
41 
42 static FFTSample * const cos_tabs[18] = {
43  NULL,
44  NULL,
45  NULL,
46  NULL,
47  TX_NAME(ff_cos_16),
48  TX_NAME(ff_cos_32),
49  TX_NAME(ff_cos_64),
50  TX_NAME(ff_cos_128),
51  TX_NAME(ff_cos_256),
52  TX_NAME(ff_cos_512),
53  TX_NAME(ff_cos_1024),
54  TX_NAME(ff_cos_2048),
55  TX_NAME(ff_cos_4096),
56  TX_NAME(ff_cos_8192),
57  TX_NAME(ff_cos_16384),
58  TX_NAME(ff_cos_32768),
59  TX_NAME(ff_cos_65536),
60  TX_NAME(ff_cos_131072),
61 };
62 
64 {
65  int m = 1 << index;
66  double freq = 2*M_PI/m;
67  FFTSample *tab = cos_tabs[index];
68  for(int i = 0; i <= m/4; i++)
69  tab[i] = RESCALE(cos(i*freq));
70  for(int i = 1; i < m/4; i++)
71  tab[m/2 - i] = tab[i];
72 }
73 
74 #define INIT_FF_COS_TABS_FUNC(index, size) \
75 static av_cold void init_cos_tabs_ ## size (void) \
76 { \
77  init_cos_tabs_idx(index); \
78 }
79 
86 INIT_FF_COS_TABS_FUNC(10, 1024)
87 INIT_FF_COS_TABS_FUNC(11, 2048)
88 INIT_FF_COS_TABS_FUNC(12, 4096)
89 INIT_FF_COS_TABS_FUNC(13, 8192)
90 INIT_FF_COS_TABS_FUNC(14, 16384)
91 INIT_FF_COS_TABS_FUNC(15, 32768)
92 INIT_FF_COS_TABS_FUNC(16, 65536)
93 INIT_FF_COS_TABS_FUNC(17, 131072)
94 
95 static av_cold void ff_init_53_tabs(void)
96 {
97  TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
98  TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
99  TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
100  TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
101 }
102 
105  { NULL },
106  { NULL },
107  { NULL },
108  { init_cos_tabs_16, AV_ONCE_INIT },
109  { init_cos_tabs_32, AV_ONCE_INIT },
110  { init_cos_tabs_64, AV_ONCE_INIT },
111  { init_cos_tabs_128, AV_ONCE_INIT },
112  { init_cos_tabs_256, AV_ONCE_INIT },
113  { init_cos_tabs_512, AV_ONCE_INIT },
114  { init_cos_tabs_1024, AV_ONCE_INIT },
115  { init_cos_tabs_2048, AV_ONCE_INIT },
116  { init_cos_tabs_4096, AV_ONCE_INIT },
117  { init_cos_tabs_8192, AV_ONCE_INIT },
118  { init_cos_tabs_16384, AV_ONCE_INIT },
119  { init_cos_tabs_32768, AV_ONCE_INIT },
120  { init_cos_tabs_65536, AV_ONCE_INIT },
121  { init_cos_tabs_131072, AV_ONCE_INIT },
122 };
123 
124 static av_cold void init_cos_tabs(int index)
125 {
126  ff_thread_once(&cos_tabs_init_once[index].control,
127  cos_tabs_init_once[index].func);
128 }
129 
131  ptrdiff_t stride)
132 {
133  FFTComplex tmp[2];
134 #ifdef TX_INT32
135  int64_t mtmp[4];
136 #endif
137 
138  BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
139  BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
140 
141  out[0*stride].re = in[0].re + tmp[1].re;
142  out[0*stride].im = in[0].im + tmp[1].im;
143 
144 #ifdef TX_INT32
145  mtmp[0] = (int64_t)TX_NAME(ff_cos_53)[0].re * tmp[0].re;
146  mtmp[1] = (int64_t)TX_NAME(ff_cos_53)[0].im * tmp[0].im;
147  mtmp[2] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].re;
148  mtmp[3] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].im;
149  out[1*stride].re = in[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
150  out[1*stride].im = in[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
151  out[2*stride].re = in[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
152  out[2*stride].im = in[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
153 #else
154  tmp[0].re = TX_NAME(ff_cos_53)[0].re * tmp[0].re;
155  tmp[0].im = TX_NAME(ff_cos_53)[0].im * tmp[0].im;
156  tmp[1].re = TX_NAME(ff_cos_53)[1].re * tmp[1].re;
157  tmp[1].im = TX_NAME(ff_cos_53)[1].re * tmp[1].im;
158  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
159  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
160  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
161  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
162 #endif
163 }
164 
165 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
166 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
167  ptrdiff_t stride) \
168 { \
169  FFTComplex z0[4], t[6]; \
170  \
171  BF(t[1].im, t[0].re, in[1].re, in[4].re); \
172  BF(t[1].re, t[0].im, in[1].im, in[4].im); \
173  BF(t[3].im, t[2].re, in[2].re, in[3].re); \
174  BF(t[3].re, t[2].im, in[2].im, in[3].im); \
175  \
176  out[D0*stride].re = in[0].re + t[0].re + t[2].re; \
177  out[D0*stride].im = in[0].im + t[0].im + t[2].im; \
178  \
179  SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
180  SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
181  CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
182  CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
183  \
184  BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
185  BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
186  BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
187  BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
188  \
189  out[D1*stride].re = in[0].re + z0[3].re; \
190  out[D1*stride].im = in[0].im + z0[0].im; \
191  out[D2*stride].re = in[0].re + z0[2].re; \
192  out[D2*stride].im = in[0].im + z0[1].im; \
193  out[D3*stride].re = in[0].re + z0[1].re; \
194  out[D3*stride].im = in[0].im + z0[2].im; \
195  out[D4*stride].re = in[0].re + z0[0].re; \
196  out[D4*stride].im = in[0].im + z0[3].im; \
197 }
198 
199 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
200 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
201 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
202 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
203 
205  ptrdiff_t stride)
206 {
207  FFTComplex tmp[15];
208 
209  for (int i = 0; i < 5; i++)
210  fft3(tmp + i, in + i*3, 5);
211 
212  fft5_m1(out, tmp + 0, stride);
213  fft5_m2(out, tmp + 5, stride);
214  fft5_m3(out, tmp + 10, stride);
215 }
216 
217 #define BUTTERFLIES(a0,a1,a2,a3) {\
218  BF(t3, t5, t5, t1);\
219  BF(a2.re, a0.re, a0.re, t5);\
220  BF(a3.im, a1.im, a1.im, t3);\
221  BF(t4, t6, t2, t6);\
222  BF(a3.re, a1.re, a1.re, t4);\
223  BF(a2.im, a0.im, a0.im, t6);\
224 }
225 
226 // force loading all the inputs before storing any.
227 // this is slightly slower for small data, but avoids store->load aliasing
228 // for addresses separated by large powers of 2.
229 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
230  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
231  BF(t3, t5, t5, t1);\
232  BF(a2.re, a0.re, r0, t5);\
233  BF(a3.im, a1.im, i1, t3);\
234  BF(t4, t6, t2, t6);\
235  BF(a3.re, a1.re, r1, t4);\
236  BF(a2.im, a0.im, i0, t6);\
237 }
238 
239 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
240  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
241  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
242  BUTTERFLIES(a0,a1,a2,a3)\
243 }
244 
245 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
246  t1 = a2.re;\
247  t2 = a2.im;\
248  t5 = a3.re;\
249  t6 = a3.im;\
250  BUTTERFLIES(a0,a1,a2,a3)\
251 }
252 
253 /* z[0...8n-1], w[1...2n-1] */
254 #define PASS(name)\
255 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
256 {\
257  FFTSample t1, t2, t3, t4, t5, t6;\
258  int o1 = 2*n;\
259  int o2 = 4*n;\
260  int o3 = 6*n;\
261  const FFTSample *wim = wre+o1;\
262  n--;\
263 \
264  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
265  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
266  do {\
267  z += 2;\
268  wre += 2;\
269  wim -= 2;\
270  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
271  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
272  } while(--n);\
273 }
274 
275 PASS(pass)
276 #undef BUTTERFLIES
277 #define BUTTERFLIES BUTTERFLIES_BIG
278 PASS(pass_big)
279 
280 #define DECL_FFT(n,n2,n4)\
281 static void fft##n(FFTComplex *z)\
282 {\
283  fft##n2(z);\
284  fft##n4(z+n4*2);\
285  fft##n4(z+n4*3);\
286  pass(z,TX_NAME(ff_cos_##n),n4/2);\
287 }
288 
289 static void fft2(FFTComplex *z)
290 {
291  FFTComplex tmp;
292  BF(tmp.re, z[0].re, z[0].re, z[1].re);
293  BF(tmp.im, z[0].im, z[0].im, z[1].im);
294  z[1] = tmp;
295 }
296 
297 static void fft4(FFTComplex *z)
298 {
299  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
300 
301  BF(t3, t1, z[0].re, z[1].re);
302  BF(t8, t6, z[3].re, z[2].re);
303  BF(z[2].re, z[0].re, t1, t6);
304  BF(t4, t2, z[0].im, z[1].im);
305  BF(t7, t5, z[2].im, z[3].im);
306  BF(z[3].im, z[1].im, t4, t8);
307  BF(z[3].re, z[1].re, t3, t7);
308  BF(z[2].im, z[0].im, t2, t5);
309 }
310 
311 static void fft8(FFTComplex *z)
312 {
313  FFTSample t1, t2, t3, t4, t5, t6;
314 
315  fft4(z);
316 
317  BF(t1, z[5].re, z[4].re, -z[5].re);
318  BF(t2, z[5].im, z[4].im, -z[5].im);
319  BF(t5, z[7].re, z[6].re, -z[7].re);
320  BF(t6, z[7].im, z[6].im, -z[7].im);
321 
322  BUTTERFLIES(z[0],z[2],z[4],z[6]);
323  TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
324 }
325 
326 static void fft16(FFTComplex *z)
327 {
328  FFTSample t1, t2, t3, t4, t5, t6;
329  FFTSample cos_16_1 = TX_NAME(ff_cos_16)[1];
330  FFTSample cos_16_3 = TX_NAME(ff_cos_16)[3];
331 
332  fft8(z);
333  fft4(z+8);
334  fft4(z+12);
335 
336  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
337  TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
338  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
339  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
340 }
341 
342 DECL_FFT(32,16,8)
343 DECL_FFT(64,32,16)
344 DECL_FFT(128,64,32)
345 DECL_FFT(256,128,64)
346 DECL_FFT(512,256,128)
347 #define pass pass_big
348 DECL_FFT(1024,512,256)
349 DECL_FFT(2048,1024,512)
350 DECL_FFT(4096,2048,1024)
351 DECL_FFT(8192,4096,2048)
352 DECL_FFT(16384,8192,4096)
353 DECL_FFT(32768,16384,8192)
354 DECL_FFT(65536,32768,16384)
355 DECL_FFT(131072,65536,32768)
356 
357 static void (* const fft_dispatch[])(FFTComplex*) = {
358  NULL, fft2, fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512,
359  fft1024, fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
360 };
361 
362 #define DECL_COMP_FFT(N) \
363 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
364  void *_in, ptrdiff_t stride) \
365 { \
366  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
367  FFTComplex *in = _in; \
368  FFTComplex *out = _out; \
369  FFTComplex fft##N##in[N]; \
370  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m)]; \
371  \
372  for (int i = 0; i < m; i++) { \
373  for (int j = 0; j < N; j++) \
374  fft##N##in[j] = in[in_map[i*N + j]]; \
375  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
376  } \
377  \
378  for (int i = 0; i < N; i++) \
379  fftp(s->tmp + m*i); \
380  \
381  for (int i = 0; i < N*m; i++) \
382  out[i] = s->tmp[out_map[i]]; \
383 }
384 
385 DECL_COMP_FFT(3)
386 DECL_COMP_FFT(5)
387 DECL_COMP_FFT(15)
388 
389 static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
390  ptrdiff_t stride)
391 {
392  FFTComplex *in = _in;
393  FFTComplex *out = _out;
394  int m = s->m, mb = av_log2(m);
395  for (int i = 0; i < m; i++)
396  out[s->revtab[i]] = in[i];
397  fft_dispatch[mb](out);
398 }
399 
400 static void naive_fft(AVTXContext *s, void *_out, void *_in,
401  ptrdiff_t stride)
402 {
403  FFTComplex *in = _in;
404  FFTComplex *out = _out;
405  const int n = s->n;
406  double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n;
407 
408  for(int i = 0; i < n; i++) {
409  FFTComplex tmp = { 0 };
410  for(int j = 0; j < n; j++) {
411  const double factor = phase*i*j;
412  const FFTComplex mult = {
413  RESCALE(cos(factor)),
414  RESCALE(sin(factor)),
415  };
416  FFTComplex res;
417  CMUL3(res, in[j], mult);
418  tmp.re += res.re;
419  tmp.im += res.im;
420  }
421  out[i] = tmp;
422  }
423 }
424 
425 #define DECL_COMP_IMDCT(N) \
426 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
427  ptrdiff_t stride) \
428 { \
429  FFTComplex fft##N##in[N]; \
430  FFTComplex *z = _dst, *exp = s->exptab; \
431  const int m = s->m, len8 = N*m >> 1; \
432  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
433  const FFTSample *src = _src, *in1, *in2; \
434  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
435  \
436  stride /= sizeof(*src); /* To convert it from bytes */ \
437  in1 = src; \
438  in2 = src + ((N*m*2) - 1) * stride; \
439  \
440  for (int i = 0; i < m; i++) { \
441  for (int j = 0; j < N; j++) { \
442  const int k = in_map[i*N + j]; \
443  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
444  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
445  } \
446  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
447  } \
448  \
449  for (int i = 0; i < N; i++) \
450  fftp(s->tmp + m*i); \
451  \
452  for (int i = 0; i < len8; i++) { \
453  const int i0 = len8 + i, i1 = len8 - i - 1; \
454  const int s0 = out_map[i0], s1 = out_map[i1]; \
455  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
456  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
457  \
458  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
459  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
460  } \
461 }
462 
465 DECL_COMP_IMDCT(15)
466 
467 #define DECL_COMP_MDCT(N) \
468 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
469  ptrdiff_t stride) \
470 { \
471  FFTSample *src = _src, *dst = _dst; \
472  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
473  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
474  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
475  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
476  \
477  stride /= sizeof(*dst); \
478  \
479  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
480  for (int j = 0; j < N; j++) { \
481  const int k = in_map[i*N + j]; \
482  if (k < len4) { \
483  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
484  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
485  } else { \
486  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
487  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
488  } \
489  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
490  exp[k >> 1].re, exp[k >> 1].im); \
491  } \
492  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
493  } \
494  \
495  for (int i = 0; i < N; i++) \
496  fftp(s->tmp + m*i); \
497  \
498  for (int i = 0; i < len8; i++) { \
499  const int i0 = len8 + i, i1 = len8 - i - 1; \
500  const int s0 = out_map[i0], s1 = out_map[i1]; \
501  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
502  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
503  \
504  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
505  exp[i0].im, exp[i0].re); \
506  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
507  exp[i1].im, exp[i1].re); \
508  } \
509 }
510 
513 DECL_COMP_MDCT(15)
514 
515 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
516  ptrdiff_t stride)
517 {
518  FFTComplex *z = _dst, *exp = s->exptab;
519  const int m = s->m, len8 = m >> 1;
520  const FFTSample *src = _src, *in1, *in2;
521  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
522 
523  stride /= sizeof(*src);
524  in1 = src;
525  in2 = src + ((m*2) - 1) * stride;
526 
527  for (int i = 0; i < m; i++) {
528  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
529  CMUL3(z[s->revtab[i]], tmp, exp[i]);
530  }
531 
532  fftp(z);
533 
534  for (int i = 0; i < len8; i++) {
535  const int i0 = len8 + i, i1 = len8 - i - 1;
536  FFTComplex src1 = { z[i1].im, z[i1].re };
537  FFTComplex src0 = { z[i0].im, z[i0].re };
538 
539  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
540  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
541  }
542 }
543 
544 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
545  ptrdiff_t stride)
546 {
547  FFTSample *src = _src, *dst = _dst;
548  FFTComplex *exp = s->exptab, tmp, *z = _dst;
549  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
550  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
551 
552  stride /= sizeof(*dst);
553 
554  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
555  const int k = 2*i;
556  if (k < len4) {
557  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
558  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
559  } else {
560  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
561  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
562  }
563  CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
564  exp[i].re, exp[i].im);
565  }
566 
567  fftp(z);
568 
569  for (int i = 0; i < len8; i++) {
570  const int i0 = len8 + i, i1 = len8 - i - 1;
571  FFTComplex src1 = { z[i1].re, z[i1].im };
572  FFTComplex src0 = { z[i0].re, z[i0].im };
573 
574  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
575  exp[i0].im, exp[i0].re);
576  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
577  exp[i1].im, exp[i1].re);
578  }
579 }
580 
581 static void naive_imdct(AVTXContext *s, void *_dst, void *_src,
582  ptrdiff_t stride)
583 {
584  int len = s->n;
585  int len2 = len*2;
586  FFTSample *src = _src;
587  FFTSample *dst = _dst;
588  double scale = s->scale;
589  const double phase = M_PI/(4.0*len2);
590 
591  stride /= sizeof(*src);
592 
593  for (int i = 0; i < len; i++) {
594  double sum_d = 0.0;
595  double sum_u = 0.0;
596  double i_d = phase * (4*len - 2*i - 1);
597  double i_u = phase * (3*len2 + 2*i + 1);
598  for (int j = 0; j < len2; j++) {
599  double a = (2 * j + 1);
600  double a_d = cos(a * i_d);
601  double a_u = cos(a * i_u);
602  double val = UNSCALE(src[j*stride]);
603  sum_d += a_d * val;
604  sum_u += a_u * val;
605  }
606  dst[i + 0] = RESCALE( sum_d*scale);
607  dst[i + len] = RESCALE(-sum_u*scale);
608  }
609 }
610 
611 static void naive_mdct(AVTXContext *s, void *_dst, void *_src,
612  ptrdiff_t stride)
613 {
614  int len = s->n*2;
615  FFTSample *src = _src;
616  FFTSample *dst = _dst;
617  double scale = s->scale;
618  const double phase = M_PI/(4.0*len);
619 
620  stride /= sizeof(*dst);
621 
622  for (int i = 0; i < len; i++) {
623  double sum = 0.0;
624  for (int j = 0; j < len*2; j++) {
625  int a = (2*j + 1 + len) * (2*i + 1);
626  sum += UNSCALE(src[j]) * cos(a * phase);
627  }
628  dst[i*stride] = RESCALE(sum*scale);
629  }
630 }
631 
632 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
633 {
634  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
635 
636  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
637  return AVERROR(ENOMEM);
638 
639  scale = sqrt(fabs(scale));
640  for (int i = 0; i < len4; i++) {
641  const double alpha = M_PI_2 * (i + theta) / len4;
642  s->exptab[i].re = RESCALE(cos(alpha) * scale);
643  s->exptab[i].im = RESCALE(sin(alpha) * scale);
644  }
645 
646  return 0;
647 }
648 
650  enum AVTXType type, int inv, int len,
651  const void *scale, uint64_t flags)
652 {
653  const int is_mdct = ff_tx_type_is_mdct(type);
654  int err, l, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1);
655 
656  if (is_mdct)
657  len >>= 1;
658 
659  l = len;
660 
661 #define CHECK_FACTOR(DST, FACTOR, SRC) \
662  if (DST == 1 && !(SRC % FACTOR)) { \
663  DST = FACTOR; \
664  SRC /= FACTOR; \
665  }
666  CHECK_FACTOR(n, 15, len)
667  CHECK_FACTOR(n, 5, len)
668  CHECK_FACTOR(n, 3, len)
669 #undef CHECK_FACTOR
670 
671  /* len must be a power of two now */
672  if (!(len & (len - 1)) && len >= 2 && len <= max_ptwo) {
673  m = len;
674  len = 1;
675  }
676 
677  s->n = n;
678  s->m = m;
679  s->inv = inv;
680  s->type = type;
681 
682  /* If we weren't able to split the length into factors we can handle,
683  * resort to using the naive and slow FT. This also filters out
684  * direct 3, 5 and 15 transforms as they're too niche. */
685  if (len > 1 || m == 1) {
686  if (is_mdct && (l & 1)) /* Odd (i)MDCTs are not supported yet */
687  return AVERROR(ENOSYS);
688  s->n = l;
689  s->m = 1;
690  *tx = naive_fft;
691  if (is_mdct) {
692  s->scale = *((SCALE_TYPE *)scale);
693  *tx = inv ? naive_imdct : naive_mdct;
694  }
695  return 0;
696  }
697 
698  if (n > 1 && m > 1) { /* 2D transform case */
699  if ((err = ff_tx_gen_compound_mapping(s)))
700  return err;
701  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
702  return AVERROR(ENOMEM);
703  *tx = n == 3 ? compound_fft_3xM :
704  n == 5 ? compound_fft_5xM :
705  compound_fft_15xM;
706  if (is_mdct)
707  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
708  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
709  inv ? compound_imdct_15xM : compound_mdct_15xM;
710  } else { /* Direct transform case */
711  *tx = monolithic_fft;
712  if (is_mdct)
713  *tx = inv ? monolithic_imdct : monolithic_mdct;
714  }
715 
716  if (n != 1)
717  init_cos_tabs(0);
718  if (m != 1) {
720  for (int i = 4; i <= av_log2(m); i++)
721  init_cos_tabs(i);
722  }
723 
724  if (is_mdct)
725  return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
726 
727  return 0;
728 }
#define NULL
Definition: coverity.c:32
#define DECL_FFT(n, n2, n4)
Definition: tx_template.c:280
int ff_tx_gen_compound_mapping(AVTXContext *s)
Definition: tx.c:44
static av_cold void ff_init_53_tabs(void)
Definition: tx_template.c:95
float re
Definition: fft.c:82
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:544
static av_cold void init_cos_tabs(int index)
Definition: tx_template.c:124
static FFTSample *const cos_tabs[18]
Definition: tx_template.c:42
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:204
#define M_SQRT1_2
Definition: mathematics.h:58
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:277
int av_log2(unsigned v)
Definition: intmath.c:26
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
static void sum_d(const int *input, int *output, int len)
Definition: dcadct.c:51
GLint GLenum type
Definition: opengl_enc.c:104
FFTSample re
Definition: avfft.h:38
static void monolithic_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:389
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx_template.c:74
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
#define t8
Definition: regdef.h:53
static av_always_inline void init_cos_tabs_idx(int index)
Definition: tx_template.c:63
int ff_tx_type_is_mdct(enum AVTXType type)
Definition: tx.c:21
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:467
void FFTComplex
Definition: tx_priv.h:46
#define t7
Definition: regdef.h:35
FFTComplex TX_NAME(ff_cos_53)[4]
#define av_cold
Definition: attributes.h:88
#define av_malloc(s)
#define mb
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:117
#define CHECK_FACTOR(DST, FACTOR, SRC)
#define src
Definition: vp8dsp.c:255
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:94
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:515
#define t1
Definition: regdef.h:29
AVTXType
Definition: tx.h:39
double scale
Definition: tx_priv.h:113
#define t3
Definition: regdef.h:31
float FFTSample
Definition: avfft.h:35
int8_t exp
Definition: eval.c:72
#define pass
Definition: tx_template.c:347
FFTComplex * exptab
Definition: tx_priv.h:115
static int16_t mult(Float11 *f1, Float11 *f2)
Definition: g726.c:55
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx_template.c:357
#define PASS(name)
Definition: tx_template.c:254
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define M_PI_2
Definition: mathematics.h:55
int * revtab
Definition: tx_priv.h:118
#define s(width, name)
Definition: cbs_vp9.c:257
static void fft8(FFTComplex *z)
Definition: tx_template.c:311
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:165
#define FF_ARRAY_ELEMS(a)
#define DECL_COMP_FFT(N)
Definition: tx_template.c:362
static void naive_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:581
#define src1
Definition: h264pred.c:140
#define AV_ONCE_INIT
Definition: thread.h:173
static void naive_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:400
static void naive_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:611
static const int16_t alpha[]
Definition: ilbcdata.h:55
static void fft16(FFTComplex *z)
Definition: tx_template.c:326
#define CMUL3(c, a, b)
Definition: mdct15.c:41
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
int index
Definition: gxfenc.c:89
float im
Definition: fft.c:82
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:130
#define src0
Definition: h264pred.c:139
static const int factor[16]
Definition: vf_pp7.c:77
#define t5
Definition: regdef.h:33
#define flags(name, subs,...)
Definition: cbs_av1.c:561
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx_template.c:632
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
FFTSample im
Definition: avfft.h:38
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:239
#define t6
Definition: regdef.h:34
static void fft2(FFTComplex *z)
Definition: tx_template.c:289
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: tx_template.c:245
static void fft4(FFTComplex *z)
Definition: tx_template.c:297
#define t4
Definition: regdef.h:32
#define BF(a, b, c, s)
int len
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:175
int TX_NAME() ff_tx_init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx_template.c:649
static const struct twinvq_data tab
FILE * out
Definition: movenc.c:54
#define av_always_inline
Definition: attributes.h:45
#define M_PI
Definition: mathematics.h:52
#define av_malloc_array(a, b)
#define stride
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int ff_tx_gen_ptwo_revtab(AVTXContext *s)
Definition: tx.c:94
static double val(void *priv, double ch)
Definition: aeval.c:76
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx_template.c:103
#define t2
Definition: regdef.h:30
int i
Definition: input.c:407
COSTABLE(16)
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:425
static uint8_t tmp[11]
Definition: aes_ctr.c:27