FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Lynne <dev@lynne.ee>
3  * Power of two FFT:
4  * Copyright (c) 2008 Loren Merritt
5  * Copyright (c) 2002 Fabrice Bellard
6  * Partly based on libdjbfft by D. J. Bernstein
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /* All costabs for a type are defined here */
26 COSTABLE(16);
27 COSTABLE(32);
28 COSTABLE(64);
29 COSTABLE(128);
30 COSTABLE(256);
31 COSTABLE(512);
32 COSTABLE(1024);
33 COSTABLE(2048);
34 COSTABLE(4096);
35 COSTABLE(8192);
36 COSTABLE(16384);
37 COSTABLE(32768);
38 COSTABLE(65536);
39 COSTABLE(131072);
40 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_53))[4];
41 
42 static FFTSample * const cos_tabs[18] = {
43  NULL,
44  NULL,
45  NULL,
46  NULL,
47  TX_NAME(ff_cos_16),
48  TX_NAME(ff_cos_32),
49  TX_NAME(ff_cos_64),
50  TX_NAME(ff_cos_128),
51  TX_NAME(ff_cos_256),
52  TX_NAME(ff_cos_512),
53  TX_NAME(ff_cos_1024),
54  TX_NAME(ff_cos_2048),
55  TX_NAME(ff_cos_4096),
56  TX_NAME(ff_cos_8192),
57  TX_NAME(ff_cos_16384),
58  TX_NAME(ff_cos_32768),
59  TX_NAME(ff_cos_65536),
60  TX_NAME(ff_cos_131072),
61 };
62 
64 {
65  int m = 1 << index;
66  double freq = 2*M_PI/m;
67  FFTSample *tab = cos_tabs[index];
68  for(int i = 0; i <= m/4; i++)
69  tab[i] = RESCALE(cos(i*freq));
70  for(int i = 1; i < m/4; i++)
71  tab[m/2 - i] = tab[i];
72 }
73 
74 #define INIT_FF_COS_TABS_FUNC(index, size) \
75 static av_cold void init_cos_tabs_ ## size (void) \
76 { \
77  init_cos_tabs_idx(index); \
78 }
79 
86 INIT_FF_COS_TABS_FUNC(10, 1024)
87 INIT_FF_COS_TABS_FUNC(11, 2048)
88 INIT_FF_COS_TABS_FUNC(12, 4096)
89 INIT_FF_COS_TABS_FUNC(13, 8192)
90 INIT_FF_COS_TABS_FUNC(14, 16384)
91 INIT_FF_COS_TABS_FUNC(15, 32768)
92 INIT_FF_COS_TABS_FUNC(16, 65536)
93 INIT_FF_COS_TABS_FUNC(17, 131072)
94 
95 static av_cold void ff_init_53_tabs(void)
96 {
97  TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
98  TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
99  TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
100  TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
101 }
102 
105  { NULL },
106  { NULL },
107  { NULL },
108  { init_cos_tabs_16, AV_ONCE_INIT },
109  { init_cos_tabs_32, AV_ONCE_INIT },
110  { init_cos_tabs_64, AV_ONCE_INIT },
111  { init_cos_tabs_128, AV_ONCE_INIT },
112  { init_cos_tabs_256, AV_ONCE_INIT },
113  { init_cos_tabs_512, AV_ONCE_INIT },
114  { init_cos_tabs_1024, AV_ONCE_INIT },
115  { init_cos_tabs_2048, AV_ONCE_INIT },
116  { init_cos_tabs_4096, AV_ONCE_INIT },
117  { init_cos_tabs_8192, AV_ONCE_INIT },
118  { init_cos_tabs_16384, AV_ONCE_INIT },
119  { init_cos_tabs_32768, AV_ONCE_INIT },
120  { init_cos_tabs_65536, AV_ONCE_INIT },
121  { init_cos_tabs_131072, AV_ONCE_INIT },
122 };
123 
124 static av_cold void init_cos_tabs(int index)
125 {
126  ff_thread_once(&cos_tabs_init_once[index].control,
127  cos_tabs_init_once[index].func);
128 }
129 
131  ptrdiff_t stride)
132 {
133  FFTComplex tmp[2];
134 #ifdef TX_INT32
135  int64_t mtmp[4];
136 #endif
137 
138  BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
139  BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
140 
141  out[0*stride].re = in[0].re + tmp[1].re;
142  out[0*stride].im = in[0].im + tmp[1].im;
143 
144 #ifdef TX_INT32
145  mtmp[0] = (int64_t)TX_NAME(ff_cos_53)[0].re * tmp[0].re;
146  mtmp[1] = (int64_t)TX_NAME(ff_cos_53)[0].im * tmp[0].im;
147  mtmp[2] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].re;
148  mtmp[3] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].im;
149  out[1*stride].re = in[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
150  out[1*stride].im = in[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
151  out[2*stride].re = in[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
152  out[2*stride].im = in[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
153 #else
154  tmp[0].re = TX_NAME(ff_cos_53)[0].re * tmp[0].re;
155  tmp[0].im = TX_NAME(ff_cos_53)[0].im * tmp[0].im;
156  tmp[1].re = TX_NAME(ff_cos_53)[1].re * tmp[1].re;
157  tmp[1].im = TX_NAME(ff_cos_53)[1].re * tmp[1].im;
158  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
159  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
160  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
161  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
162 #endif
163 }
164 
165 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
166 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
167  ptrdiff_t stride) \
168 { \
169  FFTComplex z0[4], t[6]; \
170  \
171  BF(t[1].im, t[0].re, in[1].re, in[4].re); \
172  BF(t[1].re, t[0].im, in[1].im, in[4].im); \
173  BF(t[3].im, t[2].re, in[2].re, in[3].re); \
174  BF(t[3].re, t[2].im, in[2].im, in[3].im); \
175  \
176  out[D0*stride].re = in[0].re + t[0].re + t[2].re; \
177  out[D0*stride].im = in[0].im + t[0].im + t[2].im; \
178  \
179  SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
180  SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
181  CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
182  CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
183  \
184  BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
185  BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
186  BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
187  BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
188  \
189  out[D1*stride].re = in[0].re + z0[3].re; \
190  out[D1*stride].im = in[0].im + z0[0].im; \
191  out[D2*stride].re = in[0].re + z0[2].re; \
192  out[D2*stride].im = in[0].im + z0[1].im; \
193  out[D3*stride].re = in[0].re + z0[1].re; \
194  out[D3*stride].im = in[0].im + z0[2].im; \
195  out[D4*stride].re = in[0].re + z0[0].re; \
196  out[D4*stride].im = in[0].im + z0[3].im; \
197 }
198 
199 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
200 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
201 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
202 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
203 
205  ptrdiff_t stride)
206 {
207  FFTComplex tmp[15];
208 
209  for (int i = 0; i < 5; i++)
210  fft3(tmp + i, in + i*3, 5);
211 
212  fft5_m1(out, tmp + 0, stride);
213  fft5_m2(out, tmp + 5, stride);
214  fft5_m3(out, tmp + 10, stride);
215 }
216 
217 #define BUTTERFLIES(a0,a1,a2,a3) {\
218  BF(t3, t5, t5, t1);\
219  BF(a2.re, a0.re, a0.re, t5);\
220  BF(a3.im, a1.im, a1.im, t3);\
221  BF(t4, t6, t2, t6);\
222  BF(a3.re, a1.re, a1.re, t4);\
223  BF(a2.im, a0.im, a0.im, t6);\
224 }
225 
226 // force loading all the inputs before storing any.
227 // this is slightly slower for small data, but avoids store->load aliasing
228 // for addresses separated by large powers of 2.
229 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
230  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
231  BF(t3, t5, t5, t1);\
232  BF(a2.re, a0.re, r0, t5);\
233  BF(a3.im, a1.im, i1, t3);\
234  BF(t4, t6, t2, t6);\
235  BF(a3.re, a1.re, r1, t4);\
236  BF(a2.im, a0.im, i0, t6);\
237 }
238 
239 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
240  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
241  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
242  BUTTERFLIES(a0,a1,a2,a3)\
243 }
244 
245 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
246  t1 = a2.re;\
247  t2 = a2.im;\
248  t5 = a3.re;\
249  t6 = a3.im;\
250  BUTTERFLIES(a0,a1,a2,a3)\
251 }
252 
253 /* z[0...8n-1], w[1...2n-1] */
254 #define PASS(name)\
255 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
256 {\
257  FFTSample t1, t2, t3, t4, t5, t6;\
258  int o1 = 2*n;\
259  int o2 = 4*n;\
260  int o3 = 6*n;\
261  const FFTSample *wim = wre+o1;\
262  n--;\
263 \
264  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
265  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
266  do {\
267  z += 2;\
268  wre += 2;\
269  wim -= 2;\
270  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
271  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
272  } while(--n);\
273 }
274 
275 PASS(pass)
276 #undef BUTTERFLIES
277 #define BUTTERFLIES BUTTERFLIES_BIG
278 PASS(pass_big)
279 
280 #define DECL_FFT(n,n2,n4)\
281 static void fft##n(FFTComplex *z)\
282 {\
283  fft##n2(z);\
284  fft##n4(z+n4*2);\
285  fft##n4(z+n4*3);\
286  pass(z,TX_NAME(ff_cos_##n),n4/2);\
287 }
288 
289 static void fft2(FFTComplex *z)
290 {
291  FFTComplex tmp;
292  BF(tmp.re, z[0].re, z[0].re, z[1].re);
293  BF(tmp.im, z[0].im, z[0].im, z[1].im);
294  z[1] = tmp;
295 }
296 
297 static void fft4(FFTComplex *z)
298 {
299  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
300 
301  BF(t3, t1, z[0].re, z[1].re);
302  BF(t8, t6, z[3].re, z[2].re);
303  BF(z[2].re, z[0].re, t1, t6);
304  BF(t4, t2, z[0].im, z[1].im);
305  BF(t7, t5, z[2].im, z[3].im);
306  BF(z[3].im, z[1].im, t4, t8);
307  BF(z[3].re, z[1].re, t3, t7);
308  BF(z[2].im, z[0].im, t2, t5);
309 }
310 
311 static void fft8(FFTComplex *z)
312 {
313  FFTSample t1, t2, t3, t4, t5, t6;
314 
315  fft4(z);
316 
317  BF(t1, z[5].re, z[4].re, -z[5].re);
318  BF(t2, z[5].im, z[4].im, -z[5].im);
319  BF(t5, z[7].re, z[6].re, -z[7].re);
320  BF(t6, z[7].im, z[6].im, -z[7].im);
321 
322  BUTTERFLIES(z[0],z[2],z[4],z[6]);
323  TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
324 }
325 
326 static void fft16(FFTComplex *z)
327 {
328  FFTSample t1, t2, t3, t4, t5, t6;
329  FFTSample cos_16_1 = TX_NAME(ff_cos_16)[1];
330  FFTSample cos_16_3 = TX_NAME(ff_cos_16)[3];
331 
332  fft8(z);
333  fft4(z+8);
334  fft4(z+12);
335 
336  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
337  TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
338  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
339  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
340 }
341 
342 DECL_FFT(32,16,8)
343 DECL_FFT(64,32,16)
344 DECL_FFT(128,64,32)
345 DECL_FFT(256,128,64)
346 DECL_FFT(512,256,128)
347 #define pass pass_big
348 DECL_FFT(1024,512,256)
349 DECL_FFT(2048,1024,512)
350 DECL_FFT(4096,2048,1024)
351 DECL_FFT(8192,4096,2048)
352 DECL_FFT(16384,8192,4096)
353 DECL_FFT(32768,16384,8192)
354 DECL_FFT(65536,32768,16384)
355 DECL_FFT(131072,65536,32768)
356 
357 static void (* const fft_dispatch[])(FFTComplex*) = {
358  NULL, fft2, fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512,
359  fft1024, fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
360 };
361 
362 #define DECL_COMP_FFT(N) \
363 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
364  void *_in, ptrdiff_t stride) \
365 { \
366  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
367  FFTComplex *in = _in; \
368  FFTComplex *out = _out; \
369  FFTComplex fft##N##in[N]; \
370  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m)]; \
371  \
372  for (int i = 0; i < m; i++) { \
373  for (int j = 0; j < N; j++) \
374  fft##N##in[j] = in[in_map[i*N + j]]; \
375  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
376  } \
377  \
378  for (int i = 0; i < N; i++) \
379  fftp(s->tmp + m*i); \
380  \
381  for (int i = 0; i < N*m; i++) \
382  out[i] = s->tmp[out_map[i]]; \
383 }
384 
385 DECL_COMP_FFT(3)
386 DECL_COMP_FFT(5)
387 DECL_COMP_FFT(15)
388 
389 static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
390  ptrdiff_t stride)
391 {
392  FFTComplex *in = _in;
393  FFTComplex *out = _out;
394  int m = s->m, mb = av_log2(m);
395  for (int i = 0; i < m; i++)
396  out[s->revtab[i]] = in[i];
397  fft_dispatch[mb](out);
398 }
399 
400 #define DECL_COMP_IMDCT(N) \
401 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
402  ptrdiff_t stride) \
403 { \
404  FFTComplex fft##N##in[N]; \
405  FFTComplex *z = _dst, *exp = s->exptab; \
406  const int m = s->m, len8 = N*m >> 1; \
407  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
408  const FFTSample *src = _src, *in1, *in2; \
409  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
410  \
411  stride /= sizeof(*src); /* To convert it from bytes */ \
412  in1 = src; \
413  in2 = src + ((N*m*2) - 1) * stride; \
414  \
415  for (int i = 0; i < m; i++) { \
416  for (int j = 0; j < N; j++) { \
417  const int k = in_map[i*N + j]; \
418  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
419  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
420  } \
421  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
422  } \
423  \
424  for (int i = 0; i < N; i++) \
425  fftp(s->tmp + m*i); \
426  \
427  for (int i = 0; i < len8; i++) { \
428  const int i0 = len8 + i, i1 = len8 - i - 1; \
429  const int s0 = out_map[i0], s1 = out_map[i1]; \
430  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
431  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
432  \
433  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
434  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
435  } \
436 }
437 
440 DECL_COMP_IMDCT(15)
441 
442 #define DECL_COMP_MDCT(N) \
443 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
444  ptrdiff_t stride) \
445 { \
446  FFTSample *src = _src, *dst = _dst; \
447  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
448  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
449  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
450  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
451  \
452  stride /= sizeof(*dst); \
453  \
454  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
455  for (int j = 0; j < N; j++) { \
456  const int k = in_map[i*N + j]; \
457  if (k < len4) { \
458  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
459  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
460  } else { \
461  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
462  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
463  } \
464  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
465  exp[k >> 1].re, exp[k >> 1].im); \
466  } \
467  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
468  } \
469  \
470  for (int i = 0; i < N; i++) \
471  fftp(s->tmp + m*i); \
472  \
473  for (int i = 0; i < len8; i++) { \
474  const int i0 = len8 + i, i1 = len8 - i - 1; \
475  const int s0 = out_map[i0], s1 = out_map[i1]; \
476  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
477  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
478  \
479  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
480  exp[i0].im, exp[i0].re); \
481  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
482  exp[i1].im, exp[i1].re); \
483  } \
484 }
485 
488 DECL_COMP_MDCT(15)
489 
490 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
491  ptrdiff_t stride)
492 {
493  FFTComplex *z = _dst, *exp = s->exptab;
494  const int m = s->m, len8 = m >> 1;
495  const FFTSample *src = _src, *in1, *in2;
496  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
497 
498  stride /= sizeof(*src);
499  in1 = src;
500  in2 = src + ((m*2) - 1) * stride;
501 
502  for (int i = 0; i < m; i++) {
503  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
504  CMUL3(z[s->revtab[i]], tmp, exp[i]);
505  }
506 
507  fftp(z);
508 
509  for (int i = 0; i < len8; i++) {
510  const int i0 = len8 + i, i1 = len8 - i - 1;
511  FFTComplex src1 = { z[i1].im, z[i1].re };
512  FFTComplex src0 = { z[i0].im, z[i0].re };
513 
514  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
515  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
516  }
517 }
518 
519 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
520  ptrdiff_t stride)
521 {
522  FFTSample *src = _src, *dst = _dst;
523  FFTComplex *exp = s->exptab, tmp, *z = _dst;
524  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
525  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
526 
527  stride /= sizeof(*dst);
528 
529  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
530  const int k = 2*i;
531  if (k < len4) {
532  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
533  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
534  } else {
535  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
536  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
537  }
538  CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
539  exp[i].re, exp[i].im);
540  }
541 
542  fftp(z);
543 
544  for (int i = 0; i < len8; i++) {
545  const int i0 = len8 + i, i1 = len8 - i - 1;
546  FFTComplex src1 = { z[i1].re, z[i1].im };
547  FFTComplex src0 = { z[i0].re, z[i0].im };
548 
549  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
550  exp[i0].im, exp[i0].re);
551  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
552  exp[i1].im, exp[i1].re);
553  }
554 }
555 
556 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
557 {
558  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
559 
560  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
561  return AVERROR(ENOMEM);
562 
563  scale = sqrt(fabs(scale));
564  for (int i = 0; i < len4; i++) {
565  const double alpha = M_PI_2 * (i + theta) / len4;
566  s->exptab[i].re = RESCALE(cos(alpha) * scale);
567  s->exptab[i].im = RESCALE(sin(alpha) * scale);
568  }
569 
570  return 0;
571 }
572 
574  enum AVTXType type, int inv, int len,
575  const void *scale, uint64_t flags)
576 {
577  const int is_mdct = ff_tx_type_is_mdct(type);
578  int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1);
579 
580  if (is_mdct)
581  len >>= 1;
582 
583 #define CHECK_FACTOR(DST, FACTOR, SRC) \
584  if (DST == 1 && !(SRC % FACTOR)) { \
585  DST = FACTOR; \
586  SRC /= FACTOR; \
587  }
588  CHECK_FACTOR(n, 15, len)
589  CHECK_FACTOR(n, 5, len)
590  CHECK_FACTOR(n, 3, len)
591 #undef CHECK_FACTOR
592 
593  /* len must be a power of two now */
594  if (!(len & (len - 1)) && len >= 2 && len <= max_ptwo) {
595  m = len;
596  len = 1;
597  }
598 
599  s->n = n;
600  s->m = m;
601  s->inv = inv;
602  s->type = type;
603 
604  /* Filter out direct 3, 5 and 15 transforms, too niche */
605  if (len > 1 || m == 1) {
606  av_log(NULL, AV_LOG_ERROR, "Unsupported transform size: n = %i, "
607  "m = %i, residual = %i!\n", n, m, len);
608  return AVERROR(EINVAL);
609  } else if (n > 1 && m > 1) { /* 2D transform case */
610  if ((err = ff_tx_gen_compound_mapping(s)))
611  return err;
612  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
613  return AVERROR(ENOMEM);
614  *tx = n == 3 ? compound_fft_3xM :
615  n == 5 ? compound_fft_5xM :
616  compound_fft_15xM;
617  if (is_mdct)
618  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
619  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
620  inv ? compound_imdct_15xM : compound_mdct_15xM;
621  } else { /* Direct transform case */
622  *tx = monolithic_fft;
623  if (is_mdct)
624  *tx = inv ? monolithic_imdct : monolithic_mdct;
625  }
626 
627  if (n != 1)
628  init_cos_tabs(0);
629  if (m != 1) {
631  for (int i = 4; i <= av_log2(m); i++)
632  init_cos_tabs(i);
633  }
634 
635  if (is_mdct)
636  return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
637 
638  return 0;
639 }
#define NULL
Definition: coverity.c:32
#define DECL_FFT(n, n2, n4)
Definition: tx_template.c:280
int ff_tx_gen_compound_mapping(AVTXContext *s)
Definition: tx.c:44
static av_cold void ff_init_53_tabs(void)
Definition: tx_template.c:95
float re
Definition: fft.c:82
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:519
static av_cold void init_cos_tabs(int index)
Definition: tx_template.c:124
static FFTSample *const cos_tabs[18]
Definition: tx_template.c:42
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:204
#define M_SQRT1_2
Definition: mathematics.h:58
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:277
int av_log2(unsigned v)
Definition: intmath.c:26
GLint GLenum type
Definition: opengl_enc.c:104
FFTSample re
Definition: avfft.h:38
static void monolithic_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:389
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx_template.c:74
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
#define t8
Definition: regdef.h:53
static av_always_inline void init_cos_tabs_idx(int index)
Definition: tx_template.c:63
int ff_tx_type_is_mdct(enum AVTXType type)
Definition: tx.c:21
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:442
void FFTComplex
Definition: tx_priv.h:45
#define t7
Definition: regdef.h:35
FFTComplex TX_NAME(ff_cos_53)[4]
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
#define mb
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:76
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
#define av_log(a,...)
#define CHECK_FACTOR(DST, FACTOR, SRC)
#define src
Definition: vp8dsp.c:254
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:92
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:490
#define t1
Definition: regdef.h:29
AVTXType
Definition: tx.h:39
#define t3
Definition: regdef.h:31
float FFTSample
Definition: avfft.h:35
int8_t exp
Definition: eval.c:72
#define pass
Definition: tx_template.c:347
FFTComplex * exptab
Definition: tx_priv.h:111
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx_template.c:357
#define PASS(name)
Definition: tx_template.c:254
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define M_PI_2
Definition: mathematics.h:55
int * revtab
Definition: tx_priv.h:114
#define s(width, name)
Definition: cbs_vp9.c:257
static void fft8(FFTComplex *z)
Definition: tx_template.c:311
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:165
#define FF_ARRAY_ELEMS(a)
#define DECL_COMP_FFT(N)
Definition: tx_template.c:362
#define src1
Definition: h264pred.c:139
#define AV_ONCE_INIT
Definition: thread.h:173
static const int16_t alpha[]
Definition: ilbcdata.h:55
static void fft16(FFTComplex *z)
Definition: tx_template.c:326
#define CMUL3(c, a, b)
Definition: mdct15.c:41
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
int index
Definition: gxfenc.c:89
float im
Definition: fft.c:82
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:130
#define src0
Definition: h264pred.c:138
#define t5
Definition: regdef.h:33
#define flags(name, subs,...)
Definition: cbs_av1.c:564
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx_template.c:556
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
FFTSample im
Definition: avfft.h:38
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:239
#define t6
Definition: regdef.h:34
static void fft2(FFTComplex *z)
Definition: tx_template.c:289
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: tx_template.c:245
static void fft4(FFTComplex *z)
Definition: tx_template.c:297
#define t4
Definition: regdef.h:32
#define BF(a, b, c, s)
int len
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:175
int TX_NAME() ff_tx_init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx_template.c:573
static const struct twinvq_data tab
FILE * out
Definition: movenc.c:54
#define av_always_inline
Definition: attributes.h:39
#define M_PI
Definition: mathematics.h:52
#define av_malloc_array(a, b)
#define stride
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int ff_tx_gen_ptwo_revtab(AVTXContext *s)
Definition: tx.c:94
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx_template.c:103
#define t2
Definition: regdef.h:30
COSTABLE(16)
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:400
static uint8_t tmp[11]
Definition: aes_ctr.c:26