FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Lynne <dev@lynne.ee>
3  * Power of two FFT:
4  * Copyright (c) 2008 Loren Merritt
5  * Copyright (c) 2002 Fabrice Bellard
6  * Partly based on libdjbfft by D. J. Bernstein
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /* All costabs for a type are defined here */
26 COSTABLE(16);
27 COSTABLE(32);
28 COSTABLE(64);
29 COSTABLE(128);
30 COSTABLE(256);
31 COSTABLE(512);
32 COSTABLE(1024);
33 COSTABLE(2048);
34 COSTABLE(4096);
35 COSTABLE(8192);
36 COSTABLE(16384);
37 COSTABLE(32768);
38 COSTABLE(65536);
39 COSTABLE(131072);
40 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_53))[4];
41 
42 static FFTSample * const cos_tabs[18] = {
43  NULL,
44  NULL,
45  NULL,
46  NULL,
47  TX_NAME(ff_cos_16),
48  TX_NAME(ff_cos_32),
49  TX_NAME(ff_cos_64),
50  TX_NAME(ff_cos_128),
51  TX_NAME(ff_cos_256),
52  TX_NAME(ff_cos_512),
53  TX_NAME(ff_cos_1024),
54  TX_NAME(ff_cos_2048),
55  TX_NAME(ff_cos_4096),
56  TX_NAME(ff_cos_8192),
57  TX_NAME(ff_cos_16384),
58  TX_NAME(ff_cos_32768),
59  TX_NAME(ff_cos_65536),
60  TX_NAME(ff_cos_131072),
61 };
62 
64 {
65  int m = 1 << index;
66  double freq = 2*M_PI/m;
67  FFTSample *tab = cos_tabs[index];
68  for(int i = 0; i <= m/4; i++)
69  tab[i] = cos(i*freq);
70  for(int i = 1; i < m/4; i++)
71  tab[m/2 - i] = tab[i];
72 }
73 
74 #define INIT_FF_COS_TABS_FUNC(index, size) \
75 static av_cold void init_cos_tabs_ ## size (void) \
76 { \
77  init_cos_tabs_idx(index); \
78 }
79 
86 INIT_FF_COS_TABS_FUNC(10, 1024)
87 INIT_FF_COS_TABS_FUNC(11, 2048)
88 INIT_FF_COS_TABS_FUNC(12, 4096)
89 INIT_FF_COS_TABS_FUNC(13, 8192)
90 INIT_FF_COS_TABS_FUNC(14, 16384)
91 INIT_FF_COS_TABS_FUNC(15, 32768)
92 INIT_FF_COS_TABS_FUNC(16, 65536)
93 INIT_FF_COS_TABS_FUNC(17, 131072)
94 
95 static av_cold void ff_init_53_tabs(void)
96 {
97  TX_NAME(ff_cos_53)[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) };
98  TX_NAME(ff_cos_53)[1] = (FFTComplex){ 0.5, 0.5 };
99  TX_NAME(ff_cos_53)[2] = (FFTComplex){ cos(2 * M_PI / 5), sin(2 * M_PI / 5) };
100  TX_NAME(ff_cos_53)[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) };
101 }
102 
105  { NULL },
106  { NULL },
107  { NULL },
108  { init_cos_tabs_16, AV_ONCE_INIT },
109  { init_cos_tabs_32, AV_ONCE_INIT },
110  { init_cos_tabs_64, AV_ONCE_INIT },
111  { init_cos_tabs_128, AV_ONCE_INIT },
112  { init_cos_tabs_256, AV_ONCE_INIT },
113  { init_cos_tabs_512, AV_ONCE_INIT },
114  { init_cos_tabs_1024, AV_ONCE_INIT },
115  { init_cos_tabs_2048, AV_ONCE_INIT },
116  { init_cos_tabs_4096, AV_ONCE_INIT },
117  { init_cos_tabs_8192, AV_ONCE_INIT },
118  { init_cos_tabs_16384, AV_ONCE_INIT },
119  { init_cos_tabs_32768, AV_ONCE_INIT },
120  { init_cos_tabs_65536, AV_ONCE_INIT },
121  { init_cos_tabs_131072, AV_ONCE_INIT },
122 };
123 
124 static av_cold void init_cos_tabs(int index)
125 {
126  ff_thread_once(&cos_tabs_init_once[index].control,
127  cos_tabs_init_once[index].func);
128 }
129 
131  ptrdiff_t stride)
132 {
133  FFTComplex tmp[2];
134 
135  tmp[0].re = in[1].im - in[2].im;
136  tmp[0].im = in[1].re - in[2].re;
137  tmp[1].re = in[1].re + in[2].re;
138  tmp[1].im = in[1].im + in[2].im;
139 
140  out[0*stride].re = in[0].re + tmp[1].re;
141  out[0*stride].im = in[0].im + tmp[1].im;
142 
143  tmp[0].re *= TX_NAME(ff_cos_53)[0].re;
144  tmp[0].im *= TX_NAME(ff_cos_53)[0].im;
145  tmp[1].re *= TX_NAME(ff_cos_53)[1].re;
146  tmp[1].im *= TX_NAME(ff_cos_53)[1].re;
147 
148  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
149  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
150  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
151  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
152 }
153 
154 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
155 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
156  ptrdiff_t stride) \
157 { \
158  FFTComplex z0[4], t[6]; \
159  \
160  t[0].re = in[1].re + in[4].re; \
161  t[0].im = in[1].im + in[4].im; \
162  t[1].im = in[1].re - in[4].re; \
163  t[1].re = in[1].im - in[4].im; \
164  t[2].re = in[2].re + in[3].re; \
165  t[2].im = in[2].im + in[3].im; \
166  t[3].im = in[2].re - in[3].re; \
167  t[3].re = in[2].im - in[3].im; \
168  \
169  out[D0*stride].re = in[0].re + in[1].re + in[2].re + \
170  in[3].re + in[4].re; \
171  out[D0*stride].im = in[0].im + in[1].im + in[2].im + \
172  in[3].im + in[4].im; \
173  \
174  t[4].re = TX_NAME(ff_cos_53)[2].re * t[2].re; \
175  t[4].im = TX_NAME(ff_cos_53)[2].re * t[2].im; \
176  t[4].re -= TX_NAME(ff_cos_53)[3].re * t[0].re; \
177  t[4].im -= TX_NAME(ff_cos_53)[3].re * t[0].im; \
178  t[0].re = TX_NAME(ff_cos_53)[2].re * t[0].re; \
179  t[0].im = TX_NAME(ff_cos_53)[2].re * t[0].im; \
180  t[0].re -= TX_NAME(ff_cos_53)[3].re * t[2].re; \
181  t[0].im -= TX_NAME(ff_cos_53)[3].re * t[2].im; \
182  t[5].re = TX_NAME(ff_cos_53)[2].im * t[3].re; \
183  t[5].im = TX_NAME(ff_cos_53)[2].im * t[3].im; \
184  t[5].re -= TX_NAME(ff_cos_53)[3].im * t[1].re; \
185  t[5].im -= TX_NAME(ff_cos_53)[3].im * t[1].im; \
186  t[1].re = TX_NAME(ff_cos_53)[2].im * t[1].re; \
187  t[1].im = TX_NAME(ff_cos_53)[2].im * t[1].im; \
188  t[1].re += TX_NAME(ff_cos_53)[3].im * t[3].re; \
189  t[1].im += TX_NAME(ff_cos_53)[3].im * t[3].im; \
190  \
191  z0[0].re = t[0].re - t[1].re; \
192  z0[0].im = t[0].im - t[1].im; \
193  z0[1].re = t[4].re + t[5].re; \
194  z0[1].im = t[4].im + t[5].im; \
195  \
196  z0[2].re = t[4].re - t[5].re; \
197  z0[2].im = t[4].im - t[5].im; \
198  z0[3].re = t[0].re + t[1].re; \
199  z0[3].im = t[0].im + t[1].im; \
200  \
201  out[D1*stride].re = in[0].re + z0[3].re; \
202  out[D1*stride].im = in[0].im + z0[0].im; \
203  out[D2*stride].re = in[0].re + z0[2].re; \
204  out[D2*stride].im = in[0].im + z0[1].im; \
205  out[D3*stride].re = in[0].re + z0[1].re; \
206  out[D3*stride].im = in[0].im + z0[2].im; \
207  out[D4*stride].re = in[0].re + z0[0].re; \
208  out[D4*stride].im = in[0].im + z0[3].im; \
209 }
210 
211 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
212 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
213 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
214 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
215 
217  ptrdiff_t stride)
218 {
219  FFTComplex tmp[15];
220 
221  for (int i = 0; i < 5; i++)
222  fft3(tmp + i, in + i*3, 5);
223 
224  fft5_m1(out, tmp + 0, stride);
225  fft5_m2(out, tmp + 5, stride);
226  fft5_m3(out, tmp + 10, stride);
227 }
228 
229 #define BUTTERFLIES(a0,a1,a2,a3) {\
230  BF(t3, t5, t5, t1);\
231  BF(a2.re, a0.re, a0.re, t5);\
232  BF(a3.im, a1.im, a1.im, t3);\
233  BF(t4, t6, t2, t6);\
234  BF(a3.re, a1.re, a1.re, t4);\
235  BF(a2.im, a0.im, a0.im, t6);\
236 }
237 
238 // force loading all the inputs before storing any.
239 // this is slightly slower for small data, but avoids store->load aliasing
240 // for addresses separated by large powers of 2.
241 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
242  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
243  BF(t3, t5, t5, t1);\
244  BF(a2.re, a0.re, r0, t5);\
245  BF(a3.im, a1.im, i1, t3);\
246  BF(t4, t6, t2, t6);\
247  BF(a3.re, a1.re, r1, t4);\
248  BF(a2.im, a0.im, i0, t6);\
249 }
250 
251 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
252  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
253  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
254  BUTTERFLIES(a0,a1,a2,a3)\
255 }
256 
257 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
258  t1 = a2.re;\
259  t2 = a2.im;\
260  t5 = a3.re;\
261  t6 = a3.im;\
262  BUTTERFLIES(a0,a1,a2,a3)\
263 }
264 
265 /* z[0...8n-1], w[1...2n-1] */
266 #define PASS(name)\
267 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
268 {\
269  FFTSample t1, t2, t3, t4, t5, t6;\
270  int o1 = 2*n;\
271  int o2 = 4*n;\
272  int o3 = 6*n;\
273  const FFTSample *wim = wre+o1;\
274  n--;\
275 \
276  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
277  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
278  do {\
279  z += 2;\
280  wre += 2;\
281  wim -= 2;\
282  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
283  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
284  } while(--n);\
285 }
286 
287 PASS(pass)
288 #undef BUTTERFLIES
289 #define BUTTERFLIES BUTTERFLIES_BIG
290 PASS(pass_big)
291 
292 #define DECL_FFT(n,n2,n4)\
293 static void fft##n(FFTComplex *z)\
294 {\
295  fft##n2(z);\
296  fft##n4(z+n4*2);\
297  fft##n4(z+n4*3);\
298  pass(z,TX_NAME(ff_cos_##n),n4/2);\
299 }
300 
301 static void fft4(FFTComplex *z)
302 {
303  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
304 
305  BF(t3, t1, z[0].re, z[1].re);
306  BF(t8, t6, z[3].re, z[2].re);
307  BF(z[2].re, z[0].re, t1, t6);
308  BF(t4, t2, z[0].im, z[1].im);
309  BF(t7, t5, z[2].im, z[3].im);
310  BF(z[3].im, z[1].im, t4, t8);
311  BF(z[3].re, z[1].re, t3, t7);
312  BF(z[2].im, z[0].im, t2, t5);
313 }
314 
315 static void fft8(FFTComplex *z)
316 {
317  FFTSample t1, t2, t3, t4, t5, t6;
318 
319  fft4(z);
320 
321  BF(t1, z[5].re, z[4].re, -z[5].re);
322  BF(t2, z[5].im, z[4].im, -z[5].im);
323  BF(t5, z[7].re, z[6].re, -z[7].re);
324  BF(t6, z[7].im, z[6].im, -z[7].im);
325 
326  BUTTERFLIES(z[0],z[2],z[4],z[6]);
327  TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2);
328 }
329 
330 static void fft16(FFTComplex *z)
331 {
332  FFTSample t1, t2, t3, t4, t5, t6;
333  FFTSample cos_16_1 = TX_NAME(ff_cos_16)[1];
334  FFTSample cos_16_3 = TX_NAME(ff_cos_16)[3];
335 
336  fft8(z);
337  fft4(z+8);
338  fft4(z+12);
339 
340  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
341  TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2);
342  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
343  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
344 }
345 
346 DECL_FFT(32,16,8)
347 DECL_FFT(64,32,16)
348 DECL_FFT(128,64,32)
349 DECL_FFT(256,128,64)
350 DECL_FFT(512,256,128)
351 #define pass pass_big
352 DECL_FFT(1024,512,256)
353 DECL_FFT(2048,1024,512)
354 DECL_FFT(4096,2048,1024)
355 DECL_FFT(8192,4096,2048)
356 DECL_FFT(16384,8192,4096)
357 DECL_FFT(32768,16384,8192)
358 DECL_FFT(65536,32768,16384)
359 DECL_FFT(131072,65536,32768)
360 
361 static void (* const fft_dispatch[])(FFTComplex*) = {
362  fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
363  fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
364 };
365 
366 #define DECL_COMP_FFT(N) \
367 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
368  void *_in, ptrdiff_t stride) \
369 { \
370  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
371  FFTComplex *in = _in; \
372  FFTComplex *out = _out; \
373  FFTComplex fft##N##in[N]; \
374  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m) - 2]; \
375  \
376  for (int i = 0; i < m; i++) { \
377  for (int j = 0; j < N; j++) \
378  fft##N##in[j] = in[in_map[i*N + j]]; \
379  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
380  } \
381  \
382  for (int i = 0; i < N; i++) \
383  fftp(s->tmp + m*i); \
384  \
385  for (int i = 0; i < N*m; i++) \
386  out[i] = s->tmp[out_map[i]]; \
387 }
388 
389 DECL_COMP_FFT(3)
390 DECL_COMP_FFT(5)
391 DECL_COMP_FFT(15)
392 
393 static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
394  ptrdiff_t stride)
395 {
396  FFTComplex *in = _in;
397  FFTComplex *out = _out;
398  int m = s->m, mb = av_log2(m) - 2;
399  for (int i = 0; i < m; i++)
400  out[s->revtab[i]] = in[i];
401  fft_dispatch[mb](out);
402 }
403 
404 #define DECL_COMP_IMDCT(N) \
405 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
406  ptrdiff_t stride) \
407 { \
408  FFTComplex fft##N##in[N]; \
409  FFTComplex *z = _dst, *exp = s->exptab; \
410  const int m = s->m, len8 = N*m >> 1; \
411  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
412  const FFTSample *src = _src, *in1, *in2; \
413  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
414  \
415  stride /= sizeof(*src); /* To convert it from bytes */ \
416  in1 = src; \
417  in2 = src + ((N*m*2) - 1) * stride; \
418  \
419  for (int i = 0; i < m; i++) { \
420  for (int j = 0; j < N; j++) { \
421  const int k = in_map[i*N + j]; \
422  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
423  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
424  } \
425  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
426  } \
427  \
428  for (int i = 0; i < N; i++) \
429  fftp(s->tmp + m*i); \
430  \
431  for (int i = 0; i < len8; i++) { \
432  const int i0 = len8 + i, i1 = len8 - i - 1; \
433  const int s0 = out_map[i0], s1 = out_map[i1]; \
434  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
435  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
436  \
437  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
438  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
439  } \
440 }
441 
444 DECL_COMP_IMDCT(15)
445 
446 #define DECL_COMP_MDCT(N) \
447 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
448  ptrdiff_t stride) \
449 { \
450  FFTSample *src = _src, *dst = _dst; \
451  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
452  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
453  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
454  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
455  \
456  stride /= sizeof(*dst); \
457  \
458  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
459  for (int j = 0; j < N; j++) { \
460  const int k = in_map[i*N + j]; \
461  if (k < len4) { \
462  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; \
463  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; \
464  } else { \
465  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; \
466  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; \
467  } \
468  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
469  exp[k >> 1].re, exp[k >> 1].im); \
470  } \
471  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
472  } \
473  \
474  for (int i = 0; i < N; i++) \
475  fftp(s->tmp + m*i); \
476  \
477  for (int i = 0; i < len8; i++) { \
478  const int i0 = len8 + i, i1 = len8 - i - 1; \
479  const int s0 = out_map[i0], s1 = out_map[i1]; \
480  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
481  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
482  \
483  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
484  exp[i0].im, exp[i0].re); \
485  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
486  exp[i1].im, exp[i1].re); \
487  } \
488 }
489 
492 DECL_COMP_MDCT(15)
493 
494 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
495  ptrdiff_t stride)
496 {
497  FFTComplex *z = _dst, *exp = s->exptab;
498  const int m = s->m, len8 = m >> 1;
499  const FFTSample *src = _src, *in1, *in2;
500  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
501 
502  stride /= sizeof(*src);
503  in1 = src;
504  in2 = src + ((m*2) - 1) * stride;
505 
506  for (int i = 0; i < m; i++) {
507  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
508  CMUL3(z[s->revtab[i]], tmp, exp[i]);
509  }
510 
511  fftp(z);
512 
513  for (int i = 0; i < len8; i++) {
514  const int i0 = len8 + i, i1 = len8 - i - 1;
515  FFTComplex src1 = { z[i1].im, z[i1].re };
516  FFTComplex src0 = { z[i0].im, z[i0].re };
517 
518  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
519  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
520  }
521 }
522 
523 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
524  ptrdiff_t stride)
525 {
526  FFTSample *src = _src, *dst = _dst;
527  FFTComplex *exp = s->exptab, tmp, *z = _dst;
528  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
529  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
530 
531  stride /= sizeof(*dst);
532 
533  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
534  const int k = 2*i;
535  if (k < len4) {
536  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
537  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
538  } else {
539  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
540  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
541  }
542  CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
543  exp[i].re, exp[i].im);
544  }
545 
546  fftp(z);
547 
548  for (int i = 0; i < len8; i++) {
549  const int i0 = len8 + i, i1 = len8 - i - 1;
550  FFTComplex src1 = { z[i1].re, z[i1].im };
551  FFTComplex src0 = { z[i0].re, z[i0].im };
552 
553  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
554  exp[i0].im, exp[i0].re);
555  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
556  exp[i1].im, exp[i1].re);
557  }
558 }
559 
560 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
561 {
562  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
563 
564  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
565  return AVERROR(ENOMEM);
566 
567  scale = sqrt(fabs(scale));
568  for (int i = 0; i < len4; i++) {
569  const double alpha = M_PI_2 * (i + theta) / len4;
570  s->exptab[i].re = cos(alpha) * scale;
571  s->exptab[i].im = sin(alpha) * scale;
572  }
573 
574  return 0;
575 }
576 
578  enum AVTXType type, int inv, int len,
579  const void *scale, uint64_t flags)
580 {
581  const int is_mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
582  int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
583 
584  if (is_mdct)
585  len >>= 1;
586 
587 #define CHECK_FACTOR(DST, FACTOR, SRC) \
588  if (DST == 1 && !(SRC % FACTOR)) { \
589  DST = FACTOR; \
590  SRC /= FACTOR; \
591  }
592  CHECK_FACTOR(n, 15, len)
593  CHECK_FACTOR(n, 5, len)
594  CHECK_FACTOR(n, 3, len)
595 #undef CHECK_NPTWO_FACTOR
596 
597  /* len must be a power of two now */
598  if (!(len & (len - 1)) && len >= 4 && len <= max_ptwo) {
599  m = len;
600  len = 1;
601  }
602 
603  s->n = n;
604  s->m = m;
605  s->inv = inv;
606  s->type = type;
607 
608  /* Filter out direct 3, 5 and 15 transforms, too niche */
609  if (len > 1 || m == 1) {
610  av_log(NULL, AV_LOG_ERROR, "Unsupported transform size: n = %i, "
611  "m = %i, residual = %i!\n", n, m, len);
612  return AVERROR(EINVAL);
613  } else if (n > 1 && m > 1) { /* 2D transform case */
614  if ((err = ff_tx_gen_compound_mapping(s)))
615  return err;
616  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
617  return AVERROR(ENOMEM);
618  *tx = n == 3 ? compound_fft_3xM :
619  n == 5 ? compound_fft_5xM :
620  compound_fft_15xM;
621  if (is_mdct)
622  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
623  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
624  inv ? compound_imdct_15xM : compound_mdct_15xM;
625  } else { /* Direct transform case */
626  *tx = monolithic_fft;
627  if (is_mdct)
628  *tx = inv ? monolithic_imdct : monolithic_mdct;
629  }
630 
631  if (n != 1)
632  init_cos_tabs(0);
633  if (m != 1) {
635  for (int i = 4; i <= av_log2(m); i++)
636  init_cos_tabs(i);
637  }
638 
639  if (is_mdct)
640  return gen_mdct_exptab(s, n*m, *((FFTSample *)scale));
641 
642  return 0;
643 }
#define NULL
Definition: coverity.c:32
#define DECL_FFT(n, n2, n4)
Definition: tx_template.c:292
int ff_tx_gen_compound_mapping(AVTXContext *s)
Definition: tx.c:32
static av_cold void ff_init_53_tabs(void)
Definition: tx_template.c:95
float re
Definition: fft.c:82
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:523
static av_cold void init_cos_tabs(int index)
Definition: tx_template.c:124
static FFTSample *const cos_tabs[18]
Definition: tx_template.c:42
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:216
#define M_SQRT1_2
Definition: mathematics.h:58
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:289
int av_log2(unsigned v)
Definition: intmath.c:26
GLint GLenum type
Definition: opengl_enc.c:104
FFTSample re
Definition: avfft.h:38
static void monolithic_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:393
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx_template.c:74
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
#define t8
Definition: regdef.h:53
static av_always_inline void init_cos_tabs_idx(int index)
Definition: tx_template.c:63
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:446
#define src
Definition: vp8dsp.c:254
void FFTComplex
Definition: tx_priv.h:38
#define t7
Definition: regdef.h:35
FFTComplex TX_NAME(ff_cos_53)[4]
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
#define mb
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:76
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
#define av_log(a,...)
#define CHECK_FACTOR(DST, FACTOR, SRC)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:68
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:494
#define t1
Definition: regdef.h:29
AVTXType
Definition: tx.h:35
#define t3
Definition: regdef.h:31
float FFTSample
Definition: avfft.h:35
int8_t exp
Definition: eval.c:72
#define pass
Definition: tx_template.c:351
FFTComplex * exptab
Definition: tx_priv.h:66
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx_template.c:361
#define PASS(name)
Definition: tx_template.c:266
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define M_PI_2
Definition: mathematics.h:55
int * revtab
Definition: tx_priv.h:69
#define s(width, name)
Definition: cbs_vp9.c:257
int n
Definition: avisynth_c.h:760
static void fft8(FFTComplex *z)
Definition: tx_template.c:315
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:154
#define FF_ARRAY_ELEMS(a)
#define DECL_COMP_FFT(N)
Definition: tx_template.c:366
#define src1
Definition: h264pred.c:139
#define AV_ONCE_INIT
Definition: thread.h:160
static const int16_t alpha[]
Definition: ilbcdata.h:55
static void fft16(FFTComplex *z)
Definition: tx_template.c:330
#define CMUL3(c, a, b)
Definition: mdct15.c:41
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
int index
Definition: gxfenc.c:89
float im
Definition: fft.c:82
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:130
#define src0
Definition: h264pred.c:138
#define t5
Definition: regdef.h:33
#define flags(name, subs,...)
Definition: cbs_av1.c:561
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx_template.c:560
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
FFTSample im
Definition: avfft.h:38
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:251
#define t6
Definition: regdef.h:34
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: tx_template.c:257
static void fft4(FFTComplex *z)
Definition: tx_template.c:301
#define t4
Definition: regdef.h:32
Standard MDCT with sample data type of float and a scale type of float.
Definition: tx.h:45
#define BF(a, b, c, s)
int len
Same as AV_TX_FLOAT_MDCT with data and scale type of double.
Definition: tx.h:53
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:162
int TX_NAME() ff_tx_init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx_template.c:577
static const struct twinvq_data tab
FILE * out
Definition: movenc.c:54
#define av_always_inline
Definition: attributes.h:39
#define M_PI
Definition: mathematics.h:52
#define av_malloc_array(a, b)
#define stride
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int ff_tx_gen_ptwo_revtab(AVTXContext *s)
Definition: tx.c:83
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx_template.c:103
#define t2
Definition: regdef.h:30
COSTABLE(16)
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:404
static uint8_t tmp[11]
Definition: aes_ctr.c:26