FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Lynne <dev@lynne.ee>
3  * Power of two FFT:
4  * Copyright (c) 2008 Loren Merritt
5  * Copyright (c) 2002 Fabrice Bellard
6  * Partly based on libdjbfft by D. J. Bernstein
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /* All costabs for a type are defined here */
26 COSTABLE(16);
27 COSTABLE(32);
28 COSTABLE(64);
29 COSTABLE(128);
30 COSTABLE(256);
31 COSTABLE(512);
32 COSTABLE(1024);
33 COSTABLE(2048);
34 COSTABLE(4096);
35 COSTABLE(8192);
36 COSTABLE(16384);
37 COSTABLE(32768);
38 COSTABLE(65536);
39 COSTABLE(131072);
40 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_53))[4];
41 
42 static FFTSample * const cos_tabs[18] = {
43  NULL,
44  NULL,
45  NULL,
46  NULL,
47  TX_NAME(ff_cos_16),
48  TX_NAME(ff_cos_32),
49  TX_NAME(ff_cos_64),
50  TX_NAME(ff_cos_128),
51  TX_NAME(ff_cos_256),
52  TX_NAME(ff_cos_512),
53  TX_NAME(ff_cos_1024),
54  TX_NAME(ff_cos_2048),
55  TX_NAME(ff_cos_4096),
56  TX_NAME(ff_cos_8192),
57  TX_NAME(ff_cos_16384),
58  TX_NAME(ff_cos_32768),
59  TX_NAME(ff_cos_65536),
60  TX_NAME(ff_cos_131072),
61 };
62 
64 {
65  int m = 1 << index;
66  double freq = 2*M_PI/m;
67  FFTSample *tab = cos_tabs[index];
68  for(int i = 0; i <= m/4; i++)
69  tab[i] = RESCALE(cos(i*freq));
70  for(int i = 1; i < m/4; i++)
71  tab[m/2 - i] = tab[i];
72 }
73 
74 #define INIT_FF_COS_TABS_FUNC(index, size) \
75 static av_cold void init_cos_tabs_ ## size (void) \
76 { \
77  init_cos_tabs_idx(index); \
78 }
79 
86 INIT_FF_COS_TABS_FUNC(10, 1024)
87 INIT_FF_COS_TABS_FUNC(11, 2048)
88 INIT_FF_COS_TABS_FUNC(12, 4096)
89 INIT_FF_COS_TABS_FUNC(13, 8192)
90 INIT_FF_COS_TABS_FUNC(14, 16384)
91 INIT_FF_COS_TABS_FUNC(15, 32768)
92 INIT_FF_COS_TABS_FUNC(16, 65536)
93 INIT_FF_COS_TABS_FUNC(17, 131072)
94 
95 static av_cold void ff_init_53_tabs(void)
96 {
97  TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
98  TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
99  TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
100  TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
101 }
102 
105  { NULL },
106  { NULL },
107  { NULL },
108  { init_cos_tabs_16, AV_ONCE_INIT },
109  { init_cos_tabs_32, AV_ONCE_INIT },
110  { init_cos_tabs_64, AV_ONCE_INIT },
111  { init_cos_tabs_128, AV_ONCE_INIT },
112  { init_cos_tabs_256, AV_ONCE_INIT },
113  { init_cos_tabs_512, AV_ONCE_INIT },
114  { init_cos_tabs_1024, AV_ONCE_INIT },
115  { init_cos_tabs_2048, AV_ONCE_INIT },
116  { init_cos_tabs_4096, AV_ONCE_INIT },
117  { init_cos_tabs_8192, AV_ONCE_INIT },
118  { init_cos_tabs_16384, AV_ONCE_INIT },
119  { init_cos_tabs_32768, AV_ONCE_INIT },
120  { init_cos_tabs_65536, AV_ONCE_INIT },
121  { init_cos_tabs_131072, AV_ONCE_INIT },
122 };
123 
124 static av_cold void init_cos_tabs(int index)
125 {
126  ff_thread_once(&cos_tabs_init_once[index].control,
127  cos_tabs_init_once[index].func);
128 }
129 
131  ptrdiff_t stride)
132 {
133  FFTComplex tmp[2];
134 #ifdef TX_INT32
135  int64_t mtmp[4];
136 #endif
137 
138  BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
139  BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
140 
141  out[0*stride].re = in[0].re + tmp[1].re;
142  out[0*stride].im = in[0].im + tmp[1].im;
143 
144 #ifdef TX_INT32
145  mtmp[0] = (int64_t)TX_NAME(ff_cos_53)[0].re * tmp[0].re;
146  mtmp[1] = (int64_t)TX_NAME(ff_cos_53)[0].im * tmp[0].im;
147  mtmp[2] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].re;
148  mtmp[3] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].im;
149  out[1*stride].re = in[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
150  out[1*stride].im = in[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
151  out[2*stride].re = in[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
152  out[2*stride].im = in[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
153 #else
154  tmp[0].re = TX_NAME(ff_cos_53)[0].re * tmp[0].re;
155  tmp[0].im = TX_NAME(ff_cos_53)[0].im * tmp[0].im;
156  tmp[1].re = TX_NAME(ff_cos_53)[1].re * tmp[1].re;
157  tmp[1].im = TX_NAME(ff_cos_53)[1].re * tmp[1].im;
158  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
159  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
160  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
161  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
162 #endif
163 }
164 
165 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
166 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
167  ptrdiff_t stride) \
168 { \
169  FFTComplex z0[4], t[6]; \
170  \
171  BF(t[1].im, t[0].re, in[1].re, in[4].re); \
172  BF(t[1].re, t[0].im, in[1].im, in[4].im); \
173  BF(t[3].im, t[2].re, in[2].re, in[3].re); \
174  BF(t[3].re, t[2].im, in[2].im, in[3].im); \
175  \
176  out[D0*stride].re = in[0].re + t[0].re + t[2].re; \
177  out[D0*stride].im = in[0].im + t[0].im + t[2].im; \
178  \
179  SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
180  SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
181  CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
182  CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
183  \
184  BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
185  BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
186  BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
187  BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
188  \
189  out[D1*stride].re = in[0].re + z0[3].re; \
190  out[D1*stride].im = in[0].im + z0[0].im; \
191  out[D2*stride].re = in[0].re + z0[2].re; \
192  out[D2*stride].im = in[0].im + z0[1].im; \
193  out[D3*stride].re = in[0].re + z0[1].re; \
194  out[D3*stride].im = in[0].im + z0[2].im; \
195  out[D4*stride].re = in[0].re + z0[0].re; \
196  out[D4*stride].im = in[0].im + z0[3].im; \
197 }
198 
199 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
200 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
201 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
202 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
203 
205  ptrdiff_t stride)
206 {
207  FFTComplex tmp[15];
208 
209  for (int i = 0; i < 5; i++)
210  fft3(tmp + i, in + i*3, 5);
211 
212  fft5_m1(out, tmp + 0, stride);
213  fft5_m2(out, tmp + 5, stride);
214  fft5_m3(out, tmp + 10, stride);
215 }
216 
217 #define BUTTERFLIES(a0,a1,a2,a3) {\
218  BF(t3, t5, t5, t1);\
219  BF(a2.re, a0.re, a0.re, t5);\
220  BF(a3.im, a1.im, a1.im, t3);\
221  BF(t4, t6, t2, t6);\
222  BF(a3.re, a1.re, a1.re, t4);\
223  BF(a2.im, a0.im, a0.im, t6);\
224 }
225 
226 // force loading all the inputs before storing any.
227 // this is slightly slower for small data, but avoids store->load aliasing
228 // for addresses separated by large powers of 2.
229 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
230  FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
231  BF(t3, t5, t5, t1);\
232  BF(a2.re, a0.re, r0, t5);\
233  BF(a3.im, a1.im, i1, t3);\
234  BF(t4, t6, t2, t6);\
235  BF(a3.re, a1.re, r1, t4);\
236  BF(a2.im, a0.im, i0, t6);\
237 }
238 
239 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
240  CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
241  CMUL(t5, t6, a3.re, a3.im, wre, wim);\
242  BUTTERFLIES(a0,a1,a2,a3)\
243 }
244 
245 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
246  t1 = a2.re;\
247  t2 = a2.im;\
248  t5 = a3.re;\
249  t6 = a3.im;\
250  BUTTERFLIES(a0,a1,a2,a3)\
251 }
252 
253 /* z[0...8n-1], w[1...2n-1] */
254 #define PASS(name)\
255 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
256 {\
257  FFTSample t1, t2, t3, t4, t5, t6;\
258  int o1 = 2*n;\
259  int o2 = 4*n;\
260  int o3 = 6*n;\
261  const FFTSample *wim = wre+o1;\
262  n--;\
263 \
264  TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
265  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
266  do {\
267  z += 2;\
268  wre += 2;\
269  wim -= 2;\
270  TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
271  TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
272  } while(--n);\
273 }
274 
275 PASS(pass)
276 #undef BUTTERFLIES
277 #define BUTTERFLIES BUTTERFLIES_BIG
278 PASS(pass_big)
279 
280 #define DECL_FFT(n,n2,n4)\
281 static void fft##n(FFTComplex *z)\
282 {\
283  fft##n2(z);\
284  fft##n4(z+n4*2);\
285  fft##n4(z+n4*3);\
286  pass(z,TX_NAME(ff_cos_##n),n4/2);\
287 }
288 
289 static void fft4(FFTComplex *z)
290 {
291  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
292 
293  BF(t3, t1, z[0].re, z[1].re);
294  BF(t8, t6, z[3].re, z[2].re);
295  BF(z[2].re, z[0].re, t1, t6);
296  BF(t4, t2, z[0].im, z[1].im);
297  BF(t7, t5, z[2].im, z[3].im);
298  BF(z[3].im, z[1].im, t4, t8);
299  BF(z[3].re, z[1].re, t3, t7);
300  BF(z[2].im, z[0].im, t2, t5);
301 }
302 
303 static void fft8(FFTComplex *z)
304 {
305  FFTSample t1, t2, t3, t4, t5, t6;
306 
307  fft4(z);
308 
309  BF(t1, z[5].re, z[4].re, -z[5].re);
310  BF(t2, z[5].im, z[4].im, -z[5].im);
311  BF(t5, z[7].re, z[6].re, -z[7].re);
312  BF(t6, z[7].im, z[6].im, -z[7].im);
313 
314  BUTTERFLIES(z[0],z[2],z[4],z[6]);
315  TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
316 }
317 
318 static void fft16(FFTComplex *z)
319 {
320  FFTSample t1, t2, t3, t4, t5, t6;
321  FFTSample cos_16_1 = TX_NAME(ff_cos_16)[1];
322  FFTSample cos_16_3 = TX_NAME(ff_cos_16)[3];
323 
324  fft8(z);
325  fft4(z+8);
326  fft4(z+12);
327 
328  TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
329  TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
330  TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
331  TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
332 }
333 
334 DECL_FFT(32,16,8)
335 DECL_FFT(64,32,16)
336 DECL_FFT(128,64,32)
337 DECL_FFT(256,128,64)
338 DECL_FFT(512,256,128)
339 #define pass pass_big
340 DECL_FFT(1024,512,256)
341 DECL_FFT(2048,1024,512)
342 DECL_FFT(4096,2048,1024)
343 DECL_FFT(8192,4096,2048)
344 DECL_FFT(16384,8192,4096)
345 DECL_FFT(32768,16384,8192)
346 DECL_FFT(65536,32768,16384)
347 DECL_FFT(131072,65536,32768)
348 
349 static void (* const fft_dispatch[])(FFTComplex*) = {
350  fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
351  fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
352 };
353 
354 #define DECL_COMP_FFT(N) \
355 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
356  void *_in, ptrdiff_t stride) \
357 { \
358  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
359  FFTComplex *in = _in; \
360  FFTComplex *out = _out; \
361  FFTComplex fft##N##in[N]; \
362  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m) - 2]; \
363  \
364  for (int i = 0; i < m; i++) { \
365  for (int j = 0; j < N; j++) \
366  fft##N##in[j] = in[in_map[i*N + j]]; \
367  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
368  } \
369  \
370  for (int i = 0; i < N; i++) \
371  fftp(s->tmp + m*i); \
372  \
373  for (int i = 0; i < N*m; i++) \
374  out[i] = s->tmp[out_map[i]]; \
375 }
376 
377 DECL_COMP_FFT(3)
378 DECL_COMP_FFT(5)
379 DECL_COMP_FFT(15)
380 
381 static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
382  ptrdiff_t stride)
383 {
384  FFTComplex *in = _in;
385  FFTComplex *out = _out;
386  int m = s->m, mb = av_log2(m) - 2;
387  for (int i = 0; i < m; i++)
388  out[s->revtab[i]] = in[i];
389  fft_dispatch[mb](out);
390 }
391 
392 #define DECL_COMP_IMDCT(N) \
393 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
394  ptrdiff_t stride) \
395 { \
396  FFTComplex fft##N##in[N]; \
397  FFTComplex *z = _dst, *exp = s->exptab; \
398  const int m = s->m, len8 = N*m >> 1; \
399  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
400  const FFTSample *src = _src, *in1, *in2; \
401  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
402  \
403  stride /= sizeof(*src); /* To convert it from bytes */ \
404  in1 = src; \
405  in2 = src + ((N*m*2) - 1) * stride; \
406  \
407  for (int i = 0; i < m; i++) { \
408  for (int j = 0; j < N; j++) { \
409  const int k = in_map[i*N + j]; \
410  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
411  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
412  } \
413  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
414  } \
415  \
416  for (int i = 0; i < N; i++) \
417  fftp(s->tmp + m*i); \
418  \
419  for (int i = 0; i < len8; i++) { \
420  const int i0 = len8 + i, i1 = len8 - i - 1; \
421  const int s0 = out_map[i0], s1 = out_map[i1]; \
422  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
423  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
424  \
425  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
426  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
427  } \
428 }
429 
432 DECL_COMP_IMDCT(15)
433 
434 #define DECL_COMP_MDCT(N) \
435 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
436  ptrdiff_t stride) \
437 { \
438  FFTSample *src = _src, *dst = _dst; \
439  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
440  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
441  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
442  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2]; \
443  \
444  stride /= sizeof(*dst); \
445  \
446  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
447  for (int j = 0; j < N; j++) { \
448  const int k = in_map[i*N + j]; \
449  if (k < len4) { \
450  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
451  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
452  } else { \
453  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
454  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
455  } \
456  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
457  exp[k >> 1].re, exp[k >> 1].im); \
458  } \
459  fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
460  } \
461  \
462  for (int i = 0; i < N; i++) \
463  fftp(s->tmp + m*i); \
464  \
465  for (int i = 0; i < len8; i++) { \
466  const int i0 = len8 + i, i1 = len8 - i - 1; \
467  const int s0 = out_map[i0], s1 = out_map[i1]; \
468  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
469  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
470  \
471  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
472  exp[i0].im, exp[i0].re); \
473  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
474  exp[i1].im, exp[i1].re); \
475  } \
476 }
477 
480 DECL_COMP_MDCT(15)
481 
482 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
483  ptrdiff_t stride)
484 {
485  FFTComplex *z = _dst, *exp = s->exptab;
486  const int m = s->m, len8 = m >> 1;
487  const FFTSample *src = _src, *in1, *in2;
488  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
489 
490  stride /= sizeof(*src);
491  in1 = src;
492  in2 = src + ((m*2) - 1) * stride;
493 
494  for (int i = 0; i < m; i++) {
495  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
496  CMUL3(z[s->revtab[i]], tmp, exp[i]);
497  }
498 
499  fftp(z);
500 
501  for (int i = 0; i < len8; i++) {
502  const int i0 = len8 + i, i1 = len8 - i - 1;
503  FFTComplex src1 = { z[i1].im, z[i1].re };
504  FFTComplex src0 = { z[i0].im, z[i0].re };
505 
506  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
507  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
508  }
509 }
510 
511 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
512  ptrdiff_t stride)
513 {
514  FFTSample *src = _src, *dst = _dst;
515  FFTComplex *exp = s->exptab, tmp, *z = _dst;
516  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
517  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m) - 2];
518 
519  stride /= sizeof(*dst);
520 
521  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
522  const int k = 2*i;
523  if (k < len4) {
524  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
525  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
526  } else {
527  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
528  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
529  }
530  CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
531  exp[i].re, exp[i].im);
532  }
533 
534  fftp(z);
535 
536  for (int i = 0; i < len8; i++) {
537  const int i0 = len8 + i, i1 = len8 - i - 1;
538  FFTComplex src1 = { z[i1].re, z[i1].im };
539  FFTComplex src0 = { z[i0].re, z[i0].im };
540 
541  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
542  exp[i0].im, exp[i0].re);
543  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
544  exp[i1].im, exp[i1].re);
545  }
546 }
547 
548 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
549 {
550  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
551 
552  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
553  return AVERROR(ENOMEM);
554 
555  scale = sqrt(fabs(scale));
556  for (int i = 0; i < len4; i++) {
557  const double alpha = M_PI_2 * (i + theta) / len4;
558  s->exptab[i].re = RESCALE(cos(alpha) * scale);
559  s->exptab[i].im = RESCALE(sin(alpha) * scale);
560  }
561 
562  return 0;
563 }
564 
566  enum AVTXType type, int inv, int len,
567  const void *scale, uint64_t flags)
568 {
569  const int is_mdct = ff_tx_type_is_mdct(type);
570  int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
571 
572  if (is_mdct)
573  len >>= 1;
574 
575 #define CHECK_FACTOR(DST, FACTOR, SRC) \
576  if (DST == 1 && !(SRC % FACTOR)) { \
577  DST = FACTOR; \
578  SRC /= FACTOR; \
579  }
580  CHECK_FACTOR(n, 15, len)
581  CHECK_FACTOR(n, 5, len)
582  CHECK_FACTOR(n, 3, len)
583 #undef CHECK_FACTOR
584 
585  /* len must be a power of two now */
586  if (!(len & (len - 1)) && len >= 4 && len <= max_ptwo) {
587  m = len;
588  len = 1;
589  }
590 
591  s->n = n;
592  s->m = m;
593  s->inv = inv;
594  s->type = type;
595 
596  /* Filter out direct 3, 5 and 15 transforms, too niche */
597  if (len > 1 || m == 1) {
598  av_log(NULL, AV_LOG_ERROR, "Unsupported transform size: n = %i, "
599  "m = %i, residual = %i!\n", n, m, len);
600  return AVERROR(EINVAL);
601  } else if (n > 1 && m > 1) { /* 2D transform case */
602  if ((err = ff_tx_gen_compound_mapping(s)))
603  return err;
604  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
605  return AVERROR(ENOMEM);
606  *tx = n == 3 ? compound_fft_3xM :
607  n == 5 ? compound_fft_5xM :
608  compound_fft_15xM;
609  if (is_mdct)
610  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
611  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
612  inv ? compound_imdct_15xM : compound_mdct_15xM;
613  } else { /* Direct transform case */
614  *tx = monolithic_fft;
615  if (is_mdct)
616  *tx = inv ? monolithic_imdct : monolithic_mdct;
617  }
618 
619  if (n != 1)
620  init_cos_tabs(0);
621  if (m != 1) {
623  for (int i = 4; i <= av_log2(m); i++)
624  init_cos_tabs(i);
625  }
626 
627  if (is_mdct)
628  return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
629 
630  return 0;
631 }
#define NULL
Definition: coverity.c:32
#define DECL_FFT(n, n2, n4)
Definition: tx_template.c:280
int ff_tx_gen_compound_mapping(AVTXContext *s)
Definition: tx.c:44
static av_cold void ff_init_53_tabs(void)
Definition: tx_template.c:95
float re
Definition: fft.c:82
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:511
static av_cold void init_cos_tabs(int index)
Definition: tx_template.c:124
static FFTSample *const cos_tabs[18]
Definition: tx_template.c:42
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:204
#define M_SQRT1_2
Definition: mathematics.h:58
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:277
int av_log2(unsigned v)
Definition: intmath.c:26
GLint GLenum type
Definition: opengl_enc.c:104
FFTSample re
Definition: avfft.h:38
static void monolithic_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:381
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx_template.c:74
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
#define t8
Definition: regdef.h:53
static av_always_inline void init_cos_tabs_idx(int index)
Definition: tx_template.c:63
int ff_tx_type_is_mdct(enum AVTXType type)
Definition: tx.c:21
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:434
#define src
Definition: vp8dsp.c:254
void FFTComplex
Definition: tx_priv.h:45
#define t7
Definition: regdef.h:35
FFTComplex TX_NAME(ff_cos_53)[4]
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
#define mb
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:76
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
#define av_log(a,...)
#define CHECK_FACTOR(DST, FACTOR, SRC)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:81
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:482
#define t1
Definition: regdef.h:29
AVTXType
Definition: tx.h:39
#define t3
Definition: regdef.h:31
float FFTSample
Definition: avfft.h:35
int8_t exp
Definition: eval.c:72
#define pass
Definition: tx_template.c:339
FFTComplex * exptab
Definition: tx_priv.h:111
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx_template.c:349
#define PASS(name)
Definition: tx_template.c:254
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define M_PI_2
Definition: mathematics.h:55
int * revtab
Definition: tx_priv.h:114
#define s(width, name)
Definition: cbs_vp9.c:257
int n
Definition: avisynth_c.h:760
static void fft8(FFTComplex *z)
Definition: tx_template.c:303
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:165
#define FF_ARRAY_ELEMS(a)
#define DECL_COMP_FFT(N)
Definition: tx_template.c:354
#define src1
Definition: h264pred.c:139
#define AV_ONCE_INIT
Definition: thread.h:173
static const int16_t alpha[]
Definition: ilbcdata.h:55
static void fft16(FFTComplex *z)
Definition: tx_template.c:318
#define CMUL3(c, a, b)
Definition: mdct15.c:41
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
int index
Definition: gxfenc.c:89
float im
Definition: fft.c:82
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:130
#define src0
Definition: h264pred.c:138
#define t5
Definition: regdef.h:33
#define flags(name, subs,...)
Definition: cbs_av1.c:564
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx_template.c:548
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
FFTSample im
Definition: avfft.h:38
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:239
#define t6
Definition: regdef.h:34
#define TRANSFORM_ZERO(a0, a1, a2, a3)
Definition: tx_template.c:245
static void fft4(FFTComplex *z)
Definition: tx_template.c:289
#define t4
Definition: regdef.h:32
#define BF(a, b, c, s)
int len
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:175
int TX_NAME() ff_tx_init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx_template.c:565
static const struct twinvq_data tab
FILE * out
Definition: movenc.c:54
#define av_always_inline
Definition: attributes.h:39
#define M_PI
Definition: mathematics.h:52
#define av_malloc_array(a, b)
#define stride
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int ff_tx_gen_ptwo_revtab(AVTXContext *s)
Definition: tx.c:94
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx_template.c:103
#define t2
Definition: regdef.h:30
COSTABLE(16)
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:392
static uint8_t tmp[11]
Definition: aes_ctr.c:26