FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * Power of two FFT:
5  * Copyright (c) Lynne
6  * Copyright (c) 2008 Loren Merritt
7  * Copyright (c) 2002 Fabrice Bellard
8  * Partly based on libdjbfft by D. J. Bernstein
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #define TABLE_DEF(name, size) \
28  DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size]
29 
30 #define SR_TABLE(len) \
31  TABLE_DEF(len, len/4 + 1)
32 
33 /* Power of two tables */
34 SR_TABLE(8);
35 SR_TABLE(16);
36 SR_TABLE(32);
37 SR_TABLE(64);
38 SR_TABLE(128);
39 SR_TABLE(256);
40 SR_TABLE(512);
41 SR_TABLE(1024);
42 SR_TABLE(2048);
43 SR_TABLE(4096);
44 SR_TABLE(8192);
45 SR_TABLE(16384);
46 SR_TABLE(32768);
47 SR_TABLE(65536);
48 SR_TABLE(131072);
49 
50 /* Other factors' tables */
51 TABLE_DEF(53, 8);
52 TABLE_DEF( 7, 6);
53 TABLE_DEF( 9, 8);
54 
55 typedef struct FFSRTabsInitOnce {
56  void (*func)(void);
58  int factors[TX_MAX_SUB]; /* Must be sorted high -> low */
60 
61 #define INIT_FF_SR_TAB(len) \
62 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void) \
63 { \
64  double freq = 2*M_PI/len; \
65  TXSample *tab = TX_TAB(ff_tx_tab_ ##len); \
66  \
67  for (int i = 0; i < len/4; i++) \
68  *tab++ = RESCALE(cos(i*freq)); \
69  \
70  *tab = 0; \
71 }
72 
77 INIT_FF_SR_TAB(128)
78 INIT_FF_SR_TAB(256)
79 INIT_FF_SR_TAB(512)
80 INIT_FF_SR_TAB(1024)
81 INIT_FF_SR_TAB(2048)
82 INIT_FF_SR_TAB(4096)
83 INIT_FF_SR_TAB(8192)
84 INIT_FF_SR_TAB(16384)
85 INIT_FF_SR_TAB(32768)
86 INIT_FF_SR_TAB(65536)
87 INIT_FF_SR_TAB(131072)
88 
90  { TX_TAB(ff_tx_init_tab_8), AV_ONCE_INIT },
91  { TX_TAB(ff_tx_init_tab_16), AV_ONCE_INIT },
92  { TX_TAB(ff_tx_init_tab_32), AV_ONCE_INIT },
93  { TX_TAB(ff_tx_init_tab_64), AV_ONCE_INIT },
94  { TX_TAB(ff_tx_init_tab_128), AV_ONCE_INIT },
95  { TX_TAB(ff_tx_init_tab_256), AV_ONCE_INIT },
96  { TX_TAB(ff_tx_init_tab_512), AV_ONCE_INIT },
97  { TX_TAB(ff_tx_init_tab_1024), AV_ONCE_INIT },
98  { TX_TAB(ff_tx_init_tab_2048), AV_ONCE_INIT },
99  { TX_TAB(ff_tx_init_tab_4096), AV_ONCE_INIT },
100  { TX_TAB(ff_tx_init_tab_8192), AV_ONCE_INIT },
101  { TX_TAB(ff_tx_init_tab_16384), AV_ONCE_INIT },
102  { TX_TAB(ff_tx_init_tab_32768), AV_ONCE_INIT },
103  { TX_TAB(ff_tx_init_tab_65536), AV_ONCE_INIT },
104  { TX_TAB(ff_tx_init_tab_131072), AV_ONCE_INIT },
105 };
106 
107 static av_cold void TX_TAB(ff_tx_init_tab_53)(void)
108 {
109  TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 * M_PI / 12));
110  TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 * M_PI / 12));
111  TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 * M_PI / 6));
112  TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 * M_PI / 6));
113  TX_TAB(ff_tx_tab_53)[4] = RESCALE(cos(2 * M_PI / 5));
114  TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 * M_PI / 5));
115  TX_TAB(ff_tx_tab_53)[6] = RESCALE(cos(2 * M_PI / 10));
116  TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 * M_PI / 10));
117 }
118 
119 static av_cold void TX_TAB(ff_tx_init_tab_7)(void)
120 {
121  TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 * M_PI / 7));
122  TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 * M_PI / 7));
123  TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 * M_PI / 28));
124  TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 * M_PI / 28));
125  TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 * M_PI / 14));
126  TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 * M_PI / 14));
127 }
128 
129 static av_cold void TX_TAB(ff_tx_init_tab_9)(void)
130 {
131  TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 * M_PI / 3));
132  TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 * M_PI / 3));
133  TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 * M_PI / 9));
134  TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 * M_PI / 9));
135  TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 * M_PI / 36));
136  TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 * M_PI / 36));
137  TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
138  TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
139 }
140 
142  { TX_TAB(ff_tx_init_tab_53), AV_ONCE_INIT, { 15, 5, 3 } },
143  { TX_TAB(ff_tx_init_tab_9), AV_ONCE_INIT, { 9 } },
144  { TX_TAB(ff_tx_init_tab_7), AV_ONCE_INIT, { 7 } },
145 };
146 
147 av_cold void TX_TAB(ff_tx_init_tabs)(int len)
148 {
149  int factor_2 = ff_ctz(len);
150  if (factor_2) {
151  int idx = factor_2 - 3;
152  for (int i = 0; i <= idx; i++)
155  len >>= factor_2;
156  }
157 
158  for (int i = 0; i < FF_ARRAY_ELEMS(nptwo_tabs_init_once); i++) {
159  int f, f_idx = 0;
160 
161  if (len <= 1)
162  return;
163 
164  while ((f = nptwo_tabs_init_once[i].factors[f_idx++])) {
165  if (f % len)
166  continue;
167 
170  len /= f;
171  break;
172  }
173  }
174 }
175 
177  ptrdiff_t stride)
178 {
179  TXComplex tmp[2];
180  const TXSample *tab = TX_TAB(ff_tx_tab_53);
181 #ifdef TX_INT32
182  int64_t mtmp[4];
183 #endif
184 
185  BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
186  BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
187 
188  out[0*stride].re = in[0].re + tmp[1].re;
189  out[0*stride].im = in[0].im + tmp[1].im;
190 
191 #ifdef TX_INT32
192  mtmp[0] = (int64_t)tab[0] * tmp[0].re;
193  mtmp[1] = (int64_t)tab[1] * tmp[0].im;
194  mtmp[2] = (int64_t)tab[2] * tmp[1].re;
195  mtmp[3] = (int64_t)tab[2] * tmp[1].im;
196  out[1*stride].re = in[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
197  out[1*stride].im = in[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
198  out[2*stride].re = in[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
199  out[2*stride].im = in[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
200 #else
201  tmp[0].re = tab[0] * tmp[0].re;
202  tmp[0].im = tab[1] * tmp[0].im;
203  tmp[1].re = tab[2] * tmp[1].re;
204  tmp[1].im = tab[2] * tmp[1].im;
205  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
206  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
207  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
208  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
209 #endif
210 }
211 
212 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
213 static av_always_inline void NAME(TXComplex *out, TXComplex *in, \
214  ptrdiff_t stride) \
215 { \
216  TXComplex z0[4], t[6]; \
217  const TXSample *tab = TX_TAB(ff_tx_tab_53); \
218  \
219  BF(t[1].im, t[0].re, in[1].re, in[4].re); \
220  BF(t[1].re, t[0].im, in[1].im, in[4].im); \
221  BF(t[3].im, t[2].re, in[2].re, in[3].re); \
222  BF(t[3].re, t[2].im, in[2].im, in[3].im); \
223  \
224  out[D0*stride].re = in[0].re + t[0].re + t[2].re; \
225  out[D0*stride].im = in[0].im + t[0].im + t[2].im; \
226  \
227  SMUL(t[4].re, t[0].re, tab[4], tab[6], t[2].re, t[0].re); \
228  SMUL(t[4].im, t[0].im, tab[4], tab[6], t[2].im, t[0].im); \
229  CMUL(t[5].re, t[1].re, tab[5], tab[7], t[3].re, t[1].re); \
230  CMUL(t[5].im, t[1].im, tab[5], tab[7], t[3].im, t[1].im); \
231  \
232  BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
233  BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
234  BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
235  BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
236  \
237  out[D1*stride].re = in[0].re + z0[3].re; \
238  out[D1*stride].im = in[0].im + z0[0].im; \
239  out[D2*stride].re = in[0].re + z0[2].re; \
240  out[D2*stride].im = in[0].im + z0[1].im; \
241  out[D3*stride].re = in[0].re + z0[1].re; \
242  out[D3*stride].im = in[0].im + z0[2].im; \
243  out[D4*stride].re = in[0].re + z0[0].re; \
244  out[D4*stride].im = in[0].im + z0[3].im; \
245 }
246 
247 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
248 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
249 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
250 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
251 
253  ptrdiff_t stride)
254 {
255  TXComplex t[6], z[3];
256  const TXComplex *tab = (const TXComplex *)TX_TAB(ff_tx_tab_7);
257 #ifdef TX_INT32
258  int64_t mtmp[12];
259 #endif
260 
261  BF(t[1].re, t[0].re, in[1].re, in[6].re);
262  BF(t[1].im, t[0].im, in[1].im, in[6].im);
263  BF(t[3].re, t[2].re, in[2].re, in[5].re);
264  BF(t[3].im, t[2].im, in[2].im, in[5].im);
265  BF(t[5].re, t[4].re, in[3].re, in[4].re);
266  BF(t[5].im, t[4].im, in[3].im, in[4].im);
267 
268  out[0*stride].re = in[0].re + t[0].re + t[2].re + t[4].re;
269  out[0*stride].im = in[0].im + t[0].im + t[2].im + t[4].im;
270 
271 #ifdef TX_INT32 /* NOTE: it's possible to do this with 16 mults but 72 adds */
272  mtmp[ 0] = ((int64_t)tab[0].re)*t[0].re - ((int64_t)tab[2].re)*t[4].re;
273  mtmp[ 1] = ((int64_t)tab[0].re)*t[4].re - ((int64_t)tab[1].re)*t[0].re;
274  mtmp[ 2] = ((int64_t)tab[0].re)*t[2].re - ((int64_t)tab[2].re)*t[0].re;
275  mtmp[ 3] = ((int64_t)tab[0].re)*t[0].im - ((int64_t)tab[1].re)*t[2].im;
276  mtmp[ 4] = ((int64_t)tab[0].re)*t[4].im - ((int64_t)tab[1].re)*t[0].im;
277  mtmp[ 5] = ((int64_t)tab[0].re)*t[2].im - ((int64_t)tab[2].re)*t[0].im;
278 
279  mtmp[ 6] = ((int64_t)tab[2].im)*t[1].im + ((int64_t)tab[1].im)*t[5].im;
280  mtmp[ 7] = ((int64_t)tab[0].im)*t[5].im + ((int64_t)tab[2].im)*t[3].im;
281  mtmp[ 8] = ((int64_t)tab[2].im)*t[5].im + ((int64_t)tab[1].im)*t[3].im;
282  mtmp[ 9] = ((int64_t)tab[0].im)*t[1].re + ((int64_t)tab[1].im)*t[3].re;
283  mtmp[10] = ((int64_t)tab[2].im)*t[3].re + ((int64_t)tab[0].im)*t[5].re;
284  mtmp[11] = ((int64_t)tab[2].im)*t[1].re + ((int64_t)tab[1].im)*t[5].re;
285 
286  z[0].re = (int32_t)(mtmp[ 0] - ((int64_t)tab[1].re)*t[2].re + 0x40000000 >> 31);
287  z[1].re = (int32_t)(mtmp[ 1] - ((int64_t)tab[2].re)*t[2].re + 0x40000000 >> 31);
288  z[2].re = (int32_t)(mtmp[ 2] - ((int64_t)tab[1].re)*t[4].re + 0x40000000 >> 31);
289  z[0].im = (int32_t)(mtmp[ 3] - ((int64_t)tab[2].re)*t[4].im + 0x40000000 >> 31);
290  z[1].im = (int32_t)(mtmp[ 4] - ((int64_t)tab[2].re)*t[2].im + 0x40000000 >> 31);
291  z[2].im = (int32_t)(mtmp[ 5] - ((int64_t)tab[1].re)*t[4].im + 0x40000000 >> 31);
292 
293  t[0].re = (int32_t)(mtmp[ 6] - ((int64_t)tab[0].im)*t[3].im + 0x40000000 >> 31);
294  t[2].re = (int32_t)(mtmp[ 7] - ((int64_t)tab[1].im)*t[1].im + 0x40000000 >> 31);
295  t[4].re = (int32_t)(mtmp[ 8] + ((int64_t)tab[0].im)*t[1].im + 0x40000000 >> 31);
296  t[0].im = (int32_t)(mtmp[ 9] + ((int64_t)tab[2].im)*t[5].re + 0x40000000 >> 31);
297  t[2].im = (int32_t)(mtmp[10] - ((int64_t)tab[1].im)*t[1].re + 0x40000000 >> 31);
298  t[4].im = (int32_t)(mtmp[11] - ((int64_t)tab[0].im)*t[3].re + 0x40000000 >> 31);
299 #else
300  z[0].re = tab[0].re*t[0].re - tab[2].re*t[4].re - tab[1].re*t[2].re;
301  z[1].re = tab[0].re*t[4].re - tab[1].re*t[0].re - tab[2].re*t[2].re;
302  z[2].re = tab[0].re*t[2].re - tab[2].re*t[0].re - tab[1].re*t[4].re;
303  z[0].im = tab[0].re*t[0].im - tab[1].re*t[2].im - tab[2].re*t[4].im;
304  z[1].im = tab[0].re*t[4].im - tab[1].re*t[0].im - tab[2].re*t[2].im;
305  z[2].im = tab[0].re*t[2].im - tab[2].re*t[0].im - tab[1].re*t[4].im;
306 
307  /* It's possible to do t[4].re and t[0].im with 2 multiplies only by
308  * multiplying the sum of all with the average of the twiddles */
309 
310  t[0].re = tab[2].im*t[1].im + tab[1].im*t[5].im - tab[0].im*t[3].im;
311  t[2].re = tab[0].im*t[5].im + tab[2].im*t[3].im - tab[1].im*t[1].im;
312  t[4].re = tab[2].im*t[5].im + tab[1].im*t[3].im + tab[0].im*t[1].im;
313  t[0].im = tab[0].im*t[1].re + tab[1].im*t[3].re + tab[2].im*t[5].re;
314  t[2].im = tab[2].im*t[3].re + tab[0].im*t[5].re - tab[1].im*t[1].re;
315  t[4].im = tab[2].im*t[1].re + tab[1].im*t[5].re - tab[0].im*t[3].re;
316 #endif
317 
318  BF(t[1].re, z[0].re, z[0].re, t[4].re);
319  BF(t[3].re, z[1].re, z[1].re, t[2].re);
320  BF(t[5].re, z[2].re, z[2].re, t[0].re);
321  BF(t[1].im, z[0].im, z[0].im, t[0].im);
322  BF(t[3].im, z[1].im, z[1].im, t[2].im);
323  BF(t[5].im, z[2].im, z[2].im, t[4].im);
324 
325  out[1*stride].re = in[0].re + z[0].re;
326  out[1*stride].im = in[0].im + t[1].im;
327  out[2*stride].re = in[0].re + t[3].re;
328  out[2*stride].im = in[0].im + z[1].im;
329  out[3*stride].re = in[0].re + z[2].re;
330  out[3*stride].im = in[0].im + t[5].im;
331  out[4*stride].re = in[0].re + t[5].re;
332  out[4*stride].im = in[0].im + z[2].im;
333  out[5*stride].re = in[0].re + z[1].re;
334  out[5*stride].im = in[0].im + t[3].im;
335  out[6*stride].re = in[0].re + t[1].re;
336  out[6*stride].im = in[0].im + z[0].im;
337 }
338 
340  ptrdiff_t stride)
341 {
342  const TXComplex *tab = (const TXComplex *)TX_TAB(ff_tx_tab_9);
343  TXComplex t[16], w[4], x[5], y[5], z[2];
344 #ifdef TX_INT32
345  int64_t mtmp[12];
346 #endif
347 
348  BF(t[1].re, t[0].re, in[1].re, in[8].re);
349  BF(t[1].im, t[0].im, in[1].im, in[8].im);
350  BF(t[3].re, t[2].re, in[2].re, in[7].re);
351  BF(t[3].im, t[2].im, in[2].im, in[7].im);
352  BF(t[5].re, t[4].re, in[3].re, in[6].re);
353  BF(t[5].im, t[4].im, in[3].im, in[6].im);
354  BF(t[7].re, t[6].re, in[4].re, in[5].re);
355  BF(t[7].im, t[6].im, in[4].im, in[5].im);
356 
357  w[0].re = t[0].re - t[6].re;
358  w[0].im = t[0].im - t[6].im;
359  w[1].re = t[2].re - t[6].re;
360  w[1].im = t[2].im - t[6].im;
361  w[2].re = t[1].re - t[7].re;
362  w[2].im = t[1].im - t[7].im;
363  w[3].re = t[3].re + t[7].re;
364  w[3].im = t[3].im + t[7].im;
365 
366  z[0].re = in[0].re + t[4].re;
367  z[0].im = in[0].im + t[4].im;
368 
369  z[1].re = t[0].re + t[2].re + t[6].re;
370  z[1].im = t[0].im + t[2].im + t[6].im;
371 
372  out[0*stride].re = z[0].re + z[1].re;
373  out[0*stride].im = z[0].im + z[1].im;
374 
375 #ifdef TX_INT32
376  mtmp[0] = t[1].re - t[3].re + t[7].re;
377  mtmp[1] = t[1].im - t[3].im + t[7].im;
378 
379  y[3].re = (int32_t)(((int64_t)tab[0].im)*mtmp[0] + 0x40000000 >> 31);
380  y[3].im = (int32_t)(((int64_t)tab[0].im)*mtmp[1] + 0x40000000 >> 31);
381 
382  mtmp[0] = (int32_t)(((int64_t)tab[0].re)*z[1].re + 0x40000000 >> 31);
383  mtmp[1] = (int32_t)(((int64_t)tab[0].re)*z[1].im + 0x40000000 >> 31);
384  mtmp[2] = (int32_t)(((int64_t)tab[0].re)*t[4].re + 0x40000000 >> 31);
385  mtmp[3] = (int32_t)(((int64_t)tab[0].re)*t[4].im + 0x40000000 >> 31);
386 
387  x[3].re = z[0].re + (int32_t)mtmp[0];
388  x[3].im = z[0].im + (int32_t)mtmp[1];
389  z[0].re = in[0].re + (int32_t)mtmp[2];
390  z[0].im = in[0].im + (int32_t)mtmp[3];
391 
392  mtmp[0] = ((int64_t)tab[1].re)*w[0].re;
393  mtmp[1] = ((int64_t)tab[1].re)*w[0].im;
394  mtmp[2] = ((int64_t)tab[2].im)*w[0].re;
395  mtmp[3] = ((int64_t)tab[2].im)*w[0].im;
396  mtmp[4] = ((int64_t)tab[1].im)*w[2].re;
397  mtmp[5] = ((int64_t)tab[1].im)*w[2].im;
398  mtmp[6] = ((int64_t)tab[2].re)*w[2].re;
399  mtmp[7] = ((int64_t)tab[2].re)*w[2].im;
400 
401  x[1].re = (int32_t)(mtmp[0] + ((int64_t)tab[2].im)*w[1].re + 0x40000000 >> 31);
402  x[1].im = (int32_t)(mtmp[1] + ((int64_t)tab[2].im)*w[1].im + 0x40000000 >> 31);
403  x[2].re = (int32_t)(mtmp[2] - ((int64_t)tab[3].re)*w[1].re + 0x40000000 >> 31);
404  x[2].im = (int32_t)(mtmp[3] - ((int64_t)tab[3].re)*w[1].im + 0x40000000 >> 31);
405  y[1].re = (int32_t)(mtmp[4] + ((int64_t)tab[2].re)*w[3].re + 0x40000000 >> 31);
406  y[1].im = (int32_t)(mtmp[5] + ((int64_t)tab[2].re)*w[3].im + 0x40000000 >> 31);
407  y[2].re = (int32_t)(mtmp[6] - ((int64_t)tab[3].im)*w[3].re + 0x40000000 >> 31);
408  y[2].im = (int32_t)(mtmp[7] - ((int64_t)tab[3].im)*w[3].im + 0x40000000 >> 31);
409 
410  y[0].re = (int32_t)(((int64_t)tab[0].im)*t[5].re + 0x40000000 >> 31);
411  y[0].im = (int32_t)(((int64_t)tab[0].im)*t[5].im + 0x40000000 >> 31);
412 
413 #else
414  y[3].re = tab[0].im*(t[1].re - t[3].re + t[7].re);
415  y[3].im = tab[0].im*(t[1].im - t[3].im + t[7].im);
416 
417  x[3].re = z[0].re + tab[0].re*z[1].re;
418  x[3].im = z[0].im + tab[0].re*z[1].im;
419  z[0].re = in[0].re + tab[0].re*t[4].re;
420  z[0].im = in[0].im + tab[0].re*t[4].im;
421 
422  x[1].re = tab[1].re*w[0].re + tab[2].im*w[1].re;
423  x[1].im = tab[1].re*w[0].im + tab[2].im*w[1].im;
424  x[2].re = tab[2].im*w[0].re - tab[3].re*w[1].re;
425  x[2].im = tab[2].im*w[0].im - tab[3].re*w[1].im;
426  y[1].re = tab[1].im*w[2].re + tab[2].re*w[3].re;
427  y[1].im = tab[1].im*w[2].im + tab[2].re*w[3].im;
428  y[2].re = tab[2].re*w[2].re - tab[3].im*w[3].re;
429  y[2].im = tab[2].re*w[2].im - tab[3].im*w[3].im;
430 
431  y[0].re = tab[0].im*t[5].re;
432  y[0].im = tab[0].im*t[5].im;
433 #endif
434 
435  x[4].re = x[1].re + x[2].re;
436  x[4].im = x[1].im + x[2].im;
437 
438  y[4].re = y[1].re - y[2].re;
439  y[4].im = y[1].im - y[2].im;
440  x[1].re = z[0].re + x[1].re;
441  x[1].im = z[0].im + x[1].im;
442  y[1].re = y[0].re + y[1].re;
443  y[1].im = y[0].im + y[1].im;
444  x[2].re = z[0].re + x[2].re;
445  x[2].im = z[0].im + x[2].im;
446  y[2].re = y[2].re - y[0].re;
447  y[2].im = y[2].im - y[0].im;
448  x[4].re = z[0].re - x[4].re;
449  x[4].im = z[0].im - x[4].im;
450  y[4].re = y[0].re - y[4].re;
451  y[4].im = y[0].im - y[4].im;
452 
453  out[1*stride] = (TXComplex){ x[1].re + y[1].im, x[1].im - y[1].re };
454  out[2*stride] = (TXComplex){ x[2].re + y[2].im, x[2].im - y[2].re };
455  out[3*stride] = (TXComplex){ x[3].re + y[3].im, x[3].im - y[3].re };
456  out[4*stride] = (TXComplex){ x[4].re + y[4].im, x[4].im - y[4].re };
457  out[5*stride] = (TXComplex){ x[4].re - y[4].im, x[4].im + y[4].re };
458  out[6*stride] = (TXComplex){ x[3].re - y[3].im, x[3].im + y[3].re };
459  out[7*stride] = (TXComplex){ x[2].re - y[2].im, x[2].im + y[2].re };
460  out[8*stride] = (TXComplex){ x[1].re - y[1].im, x[1].im + y[1].re };
461 }
462 
464  ptrdiff_t stride)
465 {
466  TXComplex tmp[15];
467 
468  for (int i = 0; i < 5; i++)
469  fft3(tmp + i, in + i*3, 5);
470 
471  fft5_m1(out, tmp + 0, stride);
472  fft5_m2(out, tmp + 5, stride);
473  fft5_m3(out, tmp + 10, stride);
474 }
475 
476 #define BUTTERFLIES(a0, a1, a2, a3) \
477  do { \
478  r0=a0.re; \
479  i0=a0.im; \
480  r1=a1.re; \
481  i1=a1.im; \
482  BF(t3, t5, t5, t1); \
483  BF(a2.re, a0.re, r0, t5); \
484  BF(a3.im, a1.im, i1, t3); \
485  BF(t4, t6, t2, t6); \
486  BF(a3.re, a1.re, r1, t4); \
487  BF(a2.im, a0.im, i0, t6); \
488  } while (0)
489 
490 #define TRANSFORM(a0, a1, a2, a3, wre, wim) \
491  do { \
492  CMUL(t1, t2, a2.re, a2.im, wre, -wim); \
493  CMUL(t5, t6, a3.re, a3.im, wre, wim); \
494  BUTTERFLIES(a0, a1, a2, a3); \
495  } while (0)
496 
497 /* z[0...8n-1], w[1...2n-1] */
498 static inline void TX_NAME(ff_tx_fft_sr_combine)(TXComplex *z,
499  const TXSample *cos, int len)
500 {
501  int o1 = 2*len;
502  int o2 = 4*len;
503  int o3 = 6*len;
504  const TXSample *wim = cos + o1 - 7;
505  TXSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
506 
507  for (int i = 0; i < len; i += 4) {
508  TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
509  TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
510  TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
511  TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
512 
513  TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
514  TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
515  TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
516  TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
517 
518  z += 2*4;
519  cos += 2*4;
520  wim -= 2*4;
521  }
522 }
523 
525  const FFTXCodelet *cd,
526  uint64_t flags,
528  int len, int inv,
529  const void *scale)
530 {
531  TX_TAB(ff_tx_init_tabs)(len);
532  return ff_tx_gen_ptwo_revtab(s, opts ? opts->invert_lookup : 1);
533 }
534 
535 #define DECL_SR_CODELET_DEF(n) \
536 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
537  .name = TX_NAME_STR("fft" #n "_ns"), \
538  .function = TX_NAME(ff_tx_fft##n##_ns), \
539  .type = TX_TYPE(FFT), \
540  .flags = AV_TX_INPLACE | AV_TX_UNALIGNED | \
541  FF_TX_PRESHUFFLE, \
542  .factors[0] = 2, \
543  .min_len = n, \
544  .max_len = n, \
545  .init = TX_NAME(ff_tx_fft_sr_codelet_init), \
546  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
547  .prio = FF_TX_PRIO_BASE, \
548 };
549 
550 #define DECL_SR_CODELET(n, n2, n4) \
551 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *dst, \
552  void *src, ptrdiff_t stride) \
553 { \
554  TXComplex *z = dst; \
555  const TXSample *cos = TX_TAB(ff_tx_tab_##n); \
556  \
557  TX_NAME(ff_tx_fft##n2##_ns)(s, z, z, stride); \
558  TX_NAME(ff_tx_fft##n4##_ns)(s, z + n4*2, z + n4*2, stride); \
559  TX_NAME(ff_tx_fft##n4##_ns)(s, z + n4*3, z + n4*3, stride); \
560  TX_NAME(ff_tx_fft_sr_combine)(z, cos, n4 >> 1); \
561 } \
562  \
563 DECL_SR_CODELET_DEF(n)
564 
565 static void TX_NAME(ff_tx_fft2_ns)(AVTXContext *s, void *dst,
566  void *src, ptrdiff_t stride)
567 {
568  TXComplex *z = dst;
569  TXComplex tmp;
570 
571  BF(tmp.re, z[0].re, z[0].re, z[1].re);
572  BF(tmp.im, z[0].im, z[0].im, z[1].im);
573  z[1] = tmp;
574 }
575 
576 static void TX_NAME(ff_tx_fft4_ns)(AVTXContext *s, void *dst,
577  void *src, ptrdiff_t stride)
578 {
579  TXComplex *z = dst;
580  TXSample t1, t2, t3, t4, t5, t6, t7, t8;
581 
582  BF(t3, t1, z[0].re, z[1].re);
583  BF(t8, t6, z[3].re, z[2].re);
584  BF(z[2].re, z[0].re, t1, t6);
585  BF(t4, t2, z[0].im, z[1].im);
586  BF(t7, t5, z[2].im, z[3].im);
587  BF(z[3].im, z[1].im, t4, t8);
588  BF(z[3].re, z[1].re, t3, t7);
589  BF(z[2].im, z[0].im, t2, t5);
590 }
591 
592 static void TX_NAME(ff_tx_fft8_ns)(AVTXContext *s, void *dst,
593  void *src, ptrdiff_t stride)
594 {
595  TXComplex *z = dst;
596  TXSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
597  const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
598 
599  TX_NAME(ff_tx_fft4_ns)(s, z, z, stride);
600 
601  BF(t1, z[5].re, z[4].re, -z[5].re);
602  BF(t2, z[5].im, z[4].im, -z[5].im);
603  BF(t5, z[7].re, z[6].re, -z[7].re);
604  BF(t6, z[7].im, z[6].im, -z[7].im);
605 
606  BUTTERFLIES(z[0], z[2], z[4], z[6]);
607  TRANSFORM(z[1], z[3], z[5], z[7], cos, cos);
608 }
609 
610 static void TX_NAME(ff_tx_fft16_ns)(AVTXContext *s, void *dst,
611  void *src, ptrdiff_t stride)
612 {
613  TXComplex *z = dst;
614  const TXSample *cos = TX_TAB(ff_tx_tab_16);
615 
616  TXSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
617  TXSample cos_16_1 = cos[1];
618  TXSample cos_16_2 = cos[2];
619  TXSample cos_16_3 = cos[3];
620 
621  TX_NAME(ff_tx_fft8_ns)(s, z + 0, z + 0, stride);
622  TX_NAME(ff_tx_fft4_ns)(s, z + 8, z + 8, stride);
623  TX_NAME(ff_tx_fft4_ns)(s, z + 12, z + 12, stride);
624 
625  t1 = z[ 8].re;
626  t2 = z[ 8].im;
627  t5 = z[12].re;
628  t6 = z[12].im;
629  BUTTERFLIES(z[0], z[4], z[8], z[12]);
630 
631  TRANSFORM(z[ 2], z[ 6], z[10], z[14], cos_16_2, cos_16_2);
632  TRANSFORM(z[ 1], z[ 5], z[ 9], z[13], cos_16_1, cos_16_3);
633  TRANSFORM(z[ 3], z[ 7], z[11], z[15], cos_16_3, cos_16_1);
634 }
635 
640 DECL_SR_CODELET(32,16,8)
641 DECL_SR_CODELET(64,32,16)
642 DECL_SR_CODELET(128,64,32)
643 DECL_SR_CODELET(256,128,64)
644 DECL_SR_CODELET(512,256,128)
645 DECL_SR_CODELET(1024,512,256)
646 DECL_SR_CODELET(2048,1024,512)
647 DECL_SR_CODELET(4096,2048,1024)
648 DECL_SR_CODELET(8192,4096,2048)
649 DECL_SR_CODELET(16384,8192,4096)
650 DECL_SR_CODELET(32768,16384,8192)
651 DECL_SR_CODELET(65536,32768,16384)
652 DECL_SR_CODELET(131072,65536,32768)
653 
655  const FFTXCodelet *cd,
656  uint64_t flags,
658  int len, int inv,
659  const void *scale)
660 {
661  int ret;
662  int is_inplace = !!(flags & AV_TX_INPLACE);
663  FFTXCodeletOptions sub_opts = { .invert_lookup = !is_inplace };
664 
665  flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
666  flags |= AV_TX_INPLACE; /* in-place */
667  flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
668 
669  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len, inv, scale)))
670  return ret;
671 
672  if (is_inplace && (ret = ff_tx_gen_ptwo_inplace_revtab_idx(s)))
673  return ret;
674 
675  return 0;
676 }
677 
678 static void TX_NAME(ff_tx_fft_sr)(AVTXContext *s, void *_dst,
679  void *_src, ptrdiff_t stride)
680 {
681  TXComplex *src = _src;
682  TXComplex *dst = _dst;
683  int *map = s->sub[0].map;
684  int len = s->len;
685 
686  /* Compilers can't vectorize this anyway without assuming AVX2, which they
687  * generally don't, at least without -march=native -mtune=native */
688  for (int i = 0; i < len; i++)
689  dst[i] = src[map[i]];
690 
691  s->fn[0](&s->sub[0], dst, dst, stride);
692 }
693 
694 static void TX_NAME(ff_tx_fft_sr_inplace)(AVTXContext *s, void *_dst,
695  void *_src, ptrdiff_t stride)
696 {
697  TXComplex *dst = _dst;
698  TXComplex tmp;
699  const int *map = s->sub->map;
700  const int *inplace_idx = s->map;
701  int src_idx, dst_idx;
702 
703  src_idx = *inplace_idx++;
704  do {
705  tmp = dst[src_idx];
706  dst_idx = map[src_idx];
707  do {
708  FFSWAP(TXComplex, tmp, dst[dst_idx]);
709  dst_idx = map[dst_idx];
710  } while (dst_idx != src_idx); /* Can be > as well, but was less predictable */
711  dst[dst_idx] = tmp;
712  } while ((src_idx = *inplace_idx++));
713 
714  s->fn[0](&s->sub[0], dst, dst, stride);
715 }
716 
717 static const FFTXCodelet TX_NAME(ff_tx_fft_sr_def) = {
718  .name = TX_NAME_STR("fft_sr"),
719  .function = TX_NAME(ff_tx_fft_sr),
720  .type = TX_TYPE(FFT),
722  .factors[0] = 2,
723  .min_len = 2,
724  .max_len = TX_LEN_UNLIMITED,
725  .init = TX_NAME(ff_tx_fft_sr_init),
727  .prio = FF_TX_PRIO_BASE,
728 };
729 
730 static const FFTXCodelet TX_NAME(ff_tx_fft_sr_inplace_def) = {
731  .name = TX_NAME_STR("fft_sr_inplace"),
732  .function = TX_NAME(ff_tx_fft_sr_inplace),
733  .type = TX_TYPE(FFT),
734  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE,
735  .factors[0] = 2,
736  .min_len = 2,
737  .max_len = TX_LEN_UNLIMITED,
738  .init = TX_NAME(ff_tx_fft_sr_init),
740  .prio = FF_TX_PRIO_BASE,
741 };
742 
743 static void TX_NAME(ff_tx_fft_naive)(AVTXContext *s, void *_dst, void *_src,
744  ptrdiff_t stride)
745 {
746  TXComplex *src = _src;
747  TXComplex *dst = _dst;
748  const int n = s->len;
749  double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n;
750 
751  for(int i = 0; i < n; i++) {
752  TXComplex tmp = { 0 };
753  for(int j = 0; j < n; j++) {
754  const double factor = phase*i*j;
755  const TXComplex mult = {
756  RESCALE(cos(factor)),
757  RESCALE(sin(factor)),
758  };
759  TXComplex res;
760  CMUL3(res, src[j], mult);
761  tmp.re += res.re;
762  tmp.im += res.im;
763  }
764  dst[i] = tmp;
765  }
766 }
767 
768 static const FFTXCodelet TX_NAME(ff_tx_fft_naive_def) = {
769  .name = TX_NAME_STR("fft_naive"),
770  .function = TX_NAME(ff_tx_fft_naive),
771  .type = TX_TYPE(FFT),
773  .factors[0] = TX_FACTOR_ANY,
774  .min_len = 2,
775  .max_len = TX_LEN_UNLIMITED,
776  .init = NULL,
777  .cpu_flags = FF_TX_CPU_FLAGS_ALL,
778  .prio = FF_TX_PRIO_MIN,
779 };
780 
782  const FFTXCodelet *cd,
783  uint64_t flags,
785  int len, int inv,
786  const void *scale)
787 {
788  int ret;
789  int sub_len = len / cd->factors[0];
790  FFTXCodeletOptions sub_opts = { .invert_lookup = 0 };
791 
792  flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
793  flags |= AV_TX_INPLACE; /* in-place */
794  flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
795 
796  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
797  sub_len, inv, scale)))
798  return ret;
799 
800  if ((ret = ff_tx_gen_compound_mapping(s, cd->factors[0], sub_len)))
801  return ret;
802 
803  if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
804  return AVERROR(ENOMEM);
805 
806  TX_TAB(ff_tx_init_tabs)(len / sub_len);
807 
808  return 0;
809 }
810 
811 #define DECL_COMP_FFT(N) \
812 static void TX_NAME(ff_tx_fft_pfa_##N##xM)(AVTXContext *s, void *_out, \
813  void *_in, ptrdiff_t stride) \
814 { \
815  const int m = s->sub->len; \
816  const int *in_map = s->map, *out_map = in_map + s->len; \
817  const int *sub_map = s->sub->map; \
818  TXComplex *in = _in; \
819  TXComplex *out = _out; \
820  TXComplex fft##N##in[N]; \
821  \
822  for (int i = 0; i < m; i++) { \
823  for (int j = 0; j < N; j++) \
824  fft##N##in[j] = in[in_map[i*N + j]]; \
825  fft##N(s->tmp + sub_map[i], fft##N##in, m); \
826  } \
827  \
828  for (int i = 0; i < N; i++) \
829  s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
830  \
831  for (int i = 0; i < N*m; i++) \
832  out[i] = s->tmp[out_map[i]]; \
833 } \
834  \
835 static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_##N##xM_def) = { \
836  .name = TX_NAME_STR("fft_pfa_" #N "xM"), \
837  .function = TX_NAME(ff_tx_fft_pfa_##N##xM), \
838  .type = TX_TYPE(FFT), \
839  .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE, \
840  .factors = { N, TX_FACTOR_ANY }, \
841  .min_len = N*2, \
842  .max_len = TX_LEN_UNLIMITED, \
843  .init = TX_NAME(ff_tx_fft_pfa_init), \
844  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
845  .prio = FF_TX_PRIO_BASE, \
846 };
847 
848 DECL_COMP_FFT(3)
849 DECL_COMP_FFT(5)
850 DECL_COMP_FFT(7)
851 DECL_COMP_FFT(9)
852 DECL_COMP_FFT(15)
853 
855  const FFTXCodelet *cd,
856  uint64_t flags,
858  int len, int inv,
859  const void *scale)
860 {
861  s->scale_d = *((SCALE_TYPE *)scale);
862  s->scale_f = s->scale_d;
863  return 0;
864 }
865 
866 static void TX_NAME(ff_tx_mdct_naive_fwd)(AVTXContext *s, void *_dst,
867  void *_src, ptrdiff_t stride)
868 {
869  TXSample *src = _src;
870  TXSample *dst = _dst;
871  double scale = s->scale_d;
872  int len = s->len;
873  const double phase = M_PI/(4.0*len);
874 
875  stride /= sizeof(*dst);
876 
877  for (int i = 0; i < len; i++) {
878  double sum = 0.0;
879  for (int j = 0; j < len*2; j++) {
880  int a = (2*j + 1 + len) * (2*i + 1);
881  sum += UNSCALE(src[j]) * cos(a * phase);
882  }
883  dst[i*stride] = RESCALE(sum*scale);
884  }
885 }
886 
887 static void TX_NAME(ff_tx_mdct_naive_inv)(AVTXContext *s, void *_dst,
888  void *_src, ptrdiff_t stride)
889 {
890  TXSample *src = _src;
891  TXSample *dst = _dst;
892  double scale = s->scale_d;
893  int len = s->len >> 1;
894  int len2 = len*2;
895  const double phase = M_PI/(4.0*len2);
896 
897  stride /= sizeof(*src);
898 
899  for (int i = 0; i < len; i++) {
900  double sum_d = 0.0;
901  double sum_u = 0.0;
902  double i_d = phase * (4*len - 2*i - 1);
903  double i_u = phase * (3*len2 + 2*i + 1);
904  for (int j = 0; j < len2; j++) {
905  double a = (2 * j + 1);
906  double a_d = cos(a * i_d);
907  double a_u = cos(a * i_u);
908  double val = UNSCALE(src[j*stride]);
909  sum_d += a_d * val;
910  sum_u += a_u * val;
911  }
912  dst[i + 0] = RESCALE( sum_d*scale);
913  dst[i + len] = RESCALE(-sum_u*scale);
914  }
915 }
916 
917 static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_fwd_def) = {
918  .name = TX_NAME_STR("mdct_naive_fwd"),
919  .function = TX_NAME(ff_tx_mdct_naive_fwd),
920  .type = TX_TYPE(MDCT),
922  .factors = { 2, TX_FACTOR_ANY }, /* MDCTs need an even length */
923  .min_len = 2,
924  .max_len = TX_LEN_UNLIMITED,
927  .prio = FF_TX_PRIO_MIN,
928 };
929 
930 static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = {
931  .name = TX_NAME_STR("mdct_naive_inv"),
932  .function = TX_NAME(ff_tx_mdct_naive_inv),
933  .type = TX_TYPE(MDCT),
935  .factors = { 2, TX_FACTOR_ANY },
936  .min_len = 2,
937  .max_len = TX_LEN_UNLIMITED,
940  .prio = FF_TX_PRIO_MIN,
941 };
942 
944  const FFTXCodelet *cd,
945  uint64_t flags,
947  int len, int inv,
948  const void *scale)
949 {
950  int ret;
951  FFTXCodeletOptions sub_opts = { .invert_lookup = 0 };
952 
953  s->scale_d = *((SCALE_TYPE *)scale);
954  s->scale_f = s->scale_d;
955 
956  flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
957  flags |= AV_TX_INPLACE; /* in-place */
958  flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
959 
960  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1,
961  inv, scale)))
962  return ret;
963 
964  if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s)))
965  return ret;
966 
967  return 0;
968 }
969 
970 static void TX_NAME(ff_tx_mdct_sr_fwd)(AVTXContext *s, void *_dst, void *_src,
971  ptrdiff_t stride)
972 {
973  TXSample *src = _src, *dst = _dst;
974  TXComplex *exp = s->exp, tmp, *z = _dst;
975  const int len2 = s->len >> 1;
976  const int len4 = s->len >> 2;
977  const int len3 = len2 * 3;
978  const int *sub_map = s->sub->map;
979 
980  stride /= sizeof(*dst);
981 
982  for (int i = 0; i < len2; i++) { /* Folding and pre-reindexing */
983  const int k = 2*i;
984  const int idx = sub_map[i];
985  if (k < len2) {
986  tmp.re = FOLD(-src[ len2 + k], src[1*len2 - 1 - k]);
987  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
988  } else {
989  tmp.re = FOLD(-src[ len2 + k], -src[5*len2 - 1 - k]);
990  tmp.im = FOLD( src[-len2 + k], -src[1*len3 - 1 - k]);
991  }
992  CMUL(z[idx].im, z[idx].re, tmp.re, tmp.im, exp[i].re, exp[i].im);
993  }
994 
995  s->fn[0](&s->sub[0], z, z, sizeof(TXComplex));
996 
997  for (int i = 0; i < len4; i++) {
998  const int i0 = len4 + i, i1 = len4 - i - 1;
999  TXComplex src1 = { z[i1].re, z[i1].im };
1000  TXComplex src0 = { z[i0].re, z[i0].im };
1001 
1002  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
1003  exp[i0].im, exp[i0].re);
1004  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
1005  exp[i1].im, exp[i1].re);
1006  }
1007 }
1008 
1009 static void TX_NAME(ff_tx_mdct_sr_inv)(AVTXContext *s, void *_dst, void *_src,
1010  ptrdiff_t stride)
1011 {
1012  TXComplex *z = _dst, *exp = s->exp;
1013  const TXSample *src = _src, *in1, *in2;
1014  const int len2 = s->len >> 1;
1015  const int len4 = s->len >> 2;
1016  const int *sub_map = s->sub->map;
1017 
1018  stride /= sizeof(*src);
1019  in1 = src;
1020  in2 = src + ((len2*2) - 1) * stride;
1021 
1022  for (int i = 0; i < len2; i++) {
1023  TXComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
1024  CMUL3(z[sub_map[i]], tmp, exp[i]);
1025  }
1026 
1027  s->fn[0](&s->sub[0], z, z, sizeof(TXComplex));
1028 
1029  for (int i = 0; i < len4; i++) {
1030  const int i0 = len4 + i, i1 = len4 - i - 1;
1031  TXComplex src1 = { z[i1].im, z[i1].re };
1032  TXComplex src0 = { z[i0].im, z[i0].re };
1033 
1034  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
1035  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
1036  }
1037 }
1038 
1039 static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_fwd_def) = {
1040  .name = TX_NAME_STR("mdct_sr_fwd"),
1041  .function = TX_NAME(ff_tx_mdct_sr_fwd),
1042  .type = TX_TYPE(MDCT),
1044  .factors[0] = 2,
1045  .min_len = 2,
1046  .max_len = TX_LEN_UNLIMITED,
1047  .init = TX_NAME(ff_tx_mdct_sr_init),
1049  .prio = FF_TX_PRIO_BASE,
1050 };
1051 
1052 static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_inv_def) = {
1053  .name = TX_NAME_STR("mdct_sr_inv"),
1054  .function = TX_NAME(ff_tx_mdct_sr_inv),
1055  .type = TX_TYPE(MDCT),
1057  .factors[0] = 2,
1058  .min_len = 2,
1059  .max_len = TX_LEN_UNLIMITED,
1060  .init = TX_NAME(ff_tx_mdct_sr_init),
1062  .prio = FF_TX_PRIO_BASE,
1063 };
1064 
1066  const FFTXCodelet *cd,
1067  uint64_t flags,
1069  int len, int inv,
1070  const void *scale)
1071 {
1072  int ret;
1073 
1074  s->scale_d = *((SCALE_TYPE *)scale);
1075  s->scale_f = s->scale_d;
1076 
1077  flags &= ~AV_TX_FULL_IMDCT;
1078 
1079  if ((ret = ff_tx_init_subtx(s, TX_TYPE(MDCT), flags, NULL, len, 1, scale)))
1080  return ret;
1081 
1082  return 0;
1083 }
1084 
1085 static void TX_NAME(ff_tx_mdct_inv_full)(AVTXContext *s, void *_dst,
1086  void *_src, ptrdiff_t stride)
1087 {
1088  int len = s->len << 1;
1089  int len2 = len >> 1;
1090  int len4 = len >> 2;
1091  TXSample *dst = _dst;
1092 
1093  s->fn[0](&s->sub[0], dst + len4, _src, stride);
1094 
1095  stride /= sizeof(*dst);
1096 
1097  for (int i = 0; i < len4; i++) {
1098  dst[ i*stride] = -dst[(len2 - i - 1)*stride];
1099  dst[(len - i - 1)*stride] = dst[(len2 + i + 0)*stride];
1100  }
1101 }
1102 
1103 static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_full_def) = {
1104  .name = TX_NAME_STR("mdct_inv_full"),
1105  .function = TX_NAME(ff_tx_mdct_inv_full),
1106  .type = TX_TYPE(MDCT),
1107  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1109  .factors = { 2, TX_FACTOR_ANY },
1110  .min_len = 2,
1111  .max_len = TX_LEN_UNLIMITED,
1114  .prio = FF_TX_PRIO_BASE,
1115 };
1116 
1118  const FFTXCodelet *cd,
1119  uint64_t flags,
1121  int len, int inv,
1122  const void *scale)
1123 {
1124  int ret, sub_len;
1125  FFTXCodeletOptions sub_opts = { .invert_lookup = 0 };
1126 
1127  len >>= 1;
1128  sub_len = len / cd->factors[0];
1129 
1130  s->scale_d = *((SCALE_TYPE *)scale);
1131  s->scale_f = s->scale_d;
1132 
1133  flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
1134  flags |= AV_TX_INPLACE; /* in-place */
1135  flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */
1136 
1137  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts,
1138  sub_len, inv, scale)))
1139  return ret;
1140 
1141  if ((ret = ff_tx_gen_compound_mapping(s, cd->factors[0], sub_len)))
1142  return ret;
1143 
1144  if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s)))
1145  return ret;
1146 
1147  if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
1148  return AVERROR(ENOMEM);
1149 
1150  TX_TAB(ff_tx_init_tabs)(len / sub_len);
1151 
1152  return 0;
1153 }
1154 
1155 #define DECL_COMP_IMDCT(N) \
1156 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst, \
1157  void *_src, ptrdiff_t stride) \
1158 { \
1159  TXComplex fft##N##in[N]; \
1160  TXComplex *z = _dst, *exp = s->exp; \
1161  const TXSample *src = _src, *in1, *in2; \
1162  const int len4 = s->len >> 2; \
1163  const int m = s->sub->len; \
1164  const int *in_map = s->map, *out_map = in_map + N*m; \
1165  const int *sub_map = s->sub->map; \
1166  \
1167  stride /= sizeof(*src); /* To convert it from bytes */ \
1168  in1 = src; \
1169  in2 = src + ((N*m*2) - 1) * stride; \
1170  \
1171  for (int i = 0; i < m; i++) { \
1172  for (int j = 0; j < N; j++) { \
1173  const int k = in_map[i*N + j]; \
1174  TXComplex tmp = { in2[-k*stride], in1[k*stride] }; \
1175  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
1176  } \
1177  fft##N(s->tmp + sub_map[i], fft##N##in, m); \
1178  } \
1179  \
1180  for (int i = 0; i < N; i++) \
1181  s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1182  \
1183  for (int i = 0; i < len4; i++) { \
1184  const int i0 = len4 + i, i1 = len4 - i - 1; \
1185  const int s0 = out_map[i0], s1 = out_map[i1]; \
1186  TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
1187  TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
1188  \
1189  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
1190  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
1191  } \
1192 } \
1193  \
1194 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \
1195  .name = TX_NAME_STR("mdct_pfa_" #N "xM_inv"), \
1196  .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv), \
1197  .type = TX_TYPE(MDCT), \
1198  .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
1199  .factors = { N, TX_FACTOR_ANY }, \
1200  .min_len = N*2, \
1201  .max_len = TX_LEN_UNLIMITED, \
1202  .init = TX_NAME(ff_tx_mdct_pfa_init), \
1203  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1204  .prio = FF_TX_PRIO_BASE, \
1205 };
1206 
1207 DECL_COMP_IMDCT(3)
1208 DECL_COMP_IMDCT(5)
1209 DECL_COMP_IMDCT(7)
1210 DECL_COMP_IMDCT(9)
1211 DECL_COMP_IMDCT(15)
1212 
1213 #define DECL_COMP_MDCT(N) \
1214 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst, \
1215  void *_src, ptrdiff_t stride) \
1216 { \
1217  TXComplex fft##N##in[N]; \
1218  TXSample *src = _src, *dst = _dst; \
1219  TXComplex *exp = s->exp, tmp; \
1220  const int m = s->sub->len; \
1221  const int len4 = N*m; \
1222  const int len3 = len4 * 3; \
1223  const int len8 = s->len >> 2; \
1224  const int *in_map = s->map, *out_map = in_map + N*m; \
1225  const int *sub_map = s->sub->map; \
1226  \
1227  stride /= sizeof(*dst); \
1228  \
1229  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
1230  for (int j = 0; j < N; j++) { \
1231  const int k = in_map[i*N + j]; \
1232  if (k < len4) { \
1233  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
1234  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
1235  } else { \
1236  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
1237  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
1238  } \
1239  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
1240  exp[k >> 1].re, exp[k >> 1].im); \
1241  } \
1242  fft##N(s->tmp + sub_map[i], fft##N##in, m); \
1243  } \
1244  \
1245  for (int i = 0; i < N; i++) \
1246  s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1247  \
1248  for (int i = 0; i < len8; i++) { \
1249  const int i0 = len8 + i, i1 = len8 - i - 1; \
1250  const int s0 = out_map[i0], s1 = out_map[i1]; \
1251  TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
1252  TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
1253  \
1254  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
1255  exp[i0].im, exp[i0].re); \
1256  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
1257  exp[i1].im, exp[i1].re); \
1258  } \
1259 } \
1260  \
1261 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \
1262  .name = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"), \
1263  .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd), \
1264  .type = TX_TYPE(MDCT), \
1265  .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1266  .factors = { N, TX_FACTOR_ANY }, \
1267  .min_len = N*2, \
1268  .max_len = TX_LEN_UNLIMITED, \
1269  .init = TX_NAME(ff_tx_mdct_pfa_init), \
1270  .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1271  .prio = FF_TX_PRIO_BASE, \
1272 };
1273 
1274 DECL_COMP_MDCT(3)
1275 DECL_COMP_MDCT(5)
1276 DECL_COMP_MDCT(7)
1277 DECL_COMP_MDCT(9)
1278 DECL_COMP_MDCT(15)
1279 
1281  const FFTXCodelet *cd,
1282  uint64_t flags,
1284  int len, int inv,
1285  const void *scale)
1286 {
1287  int ret;
1288  double f, m;
1289  TXSample *tab;
1290 
1291  s->scale_d = *((SCALE_TYPE *)scale);
1292  s->scale_f = s->scale_d;
1293 
1294  if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
1295  return ret;
1296 
1297  if (!(s->exp = av_mallocz((8 + (len >> 2) - 1)*sizeof(*s->exp))))
1298  return AVERROR(ENOMEM);
1299 
1300  tab = (TXSample *)s->exp;
1301 
1302  f = 2*M_PI/len;
1303 
1304  m = (inv ? 2*s->scale_d : s->scale_d);
1305 
1306  *tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
1307  *tab++ = RESCALE(inv ? 0.5*m : 1.0);
1308  *tab++ = RESCALE( m);
1309  *tab++ = RESCALE(-m);
1310 
1311  *tab++ = RESCALE( (0.5 - 0.0) * m);
1312  *tab++ = RESCALE( (0.0 - 0.5) * m);
1313  *tab++ = RESCALE( (0.5 - inv) * m);
1314  *tab++ = RESCALE(-(0.5 - inv) * m);
1315 
1316  for (int i = 0; i < len >> 2; i++)
1317  *tab++ = RESCALE(cos(i*f));
1318  for (int i = len >> 2; i >= 0; i--)
1319  *tab++ = RESCALE(cos(i*f) * (inv ? +1.0 : -1.0));
1320 
1321  return 0;
1322 }
1323 
1324 #define DECL_RDFT(name, inv) \
1325 static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
1326  void *_src, ptrdiff_t stride) \
1327 { \
1328  const int len2 = s->len >> 1; \
1329  const int len4 = s->len >> 2; \
1330  const TXSample *fact = (void *)s->exp; \
1331  const TXSample *tcos = fact + 8; \
1332  const TXSample *tsin = tcos + len4; \
1333  TXComplex *data = inv ? _src : _dst; \
1334  TXComplex t[3]; \
1335  \
1336  if (!inv) \
1337  s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex)); \
1338  else \
1339  data[0].im = data[len2].re; \
1340  \
1341  /* The DC value's both components are real, but we need to change them \
1342  * into complex values. Also, the middle of the array is special-cased. \
1343  * These operations can be done before or after the loop. */ \
1344  t[0].re = data[0].re; \
1345  data[0].re = t[0].re + data[0].im; \
1346  data[0].im = t[0].re - data[0].im; \
1347  data[ 0].re = MULT(fact[0], data[ 0].re); \
1348  data[ 0].im = MULT(fact[1], data[ 0].im); \
1349  data[len4].re = MULT(fact[2], data[len4].re); \
1350  data[len4].im = MULT(fact[3], data[len4].im); \
1351  \
1352  for (int i = 1; i < len4; i++) { \
1353  /* Separate even and odd FFTs */ \
1354  t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re)); \
1355  t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im)); \
1356  t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im)); \
1357  t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re)); \
1358  \
1359  /* Apply twiddle factors to the odd FFT and add to the even FFT */ \
1360  CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]); \
1361  \
1362  data[ i].re = t[0].re + t[2].re; \
1363  data[ i].im = t[2].im - t[0].im; \
1364  data[len2 - i].re = t[0].re - t[2].re; \
1365  data[len2 - i].im = t[2].im + t[0].im; \
1366  } \
1367  \
1368  if (inv) { \
1369  s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex)); \
1370  } else { \
1371  /* Move [0].im to the last position, as convention requires */ \
1372  data[len2].re = data[0].im; \
1373  data[ 0].im = 0; \
1374  } \
1375 }
1376 
1377 DECL_RDFT(r2c, 0)
1378 DECL_RDFT(c2r, 1)
1379 
1380 static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = {
1381  .name = TX_NAME_STR("rdft_r2c"),
1382  .function = TX_NAME(ff_tx_rdft_r2c),
1383  .type = TX_TYPE(RDFT),
1384  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1386  .factors = { 2, TX_FACTOR_ANY },
1387  .min_len = 2,
1388  .max_len = TX_LEN_UNLIMITED,
1389  .init = TX_NAME(ff_tx_rdft_init),
1391  .prio = FF_TX_PRIO_BASE,
1392 };
1393 
1394 static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = {
1395  .name = TX_NAME_STR("rdft_c2r"),
1396  .function = TX_NAME(ff_tx_rdft_c2r),
1397  .type = TX_TYPE(RDFT),
1398  .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
1400  .factors = { 2, TX_FACTOR_ANY },
1401  .min_len = 2,
1402  .max_len = TX_LEN_UNLIMITED,
1403  .init = TX_NAME(ff_tx_rdft_init),
1405  .prio = FF_TX_PRIO_BASE,
1406 };
1407 
1408 int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s)
1409 {
1410  int len4 = s->len >> 1;
1411  double scale = s->scale_d;
1412  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
1413 
1414  if (!(s->exp = av_malloc_array(len4, sizeof(*s->exp))))
1415  return AVERROR(ENOMEM);
1416 
1417  scale = sqrt(fabs(scale));
1418  for (int i = 0; i < len4; i++) {
1419  const double alpha = M_PI_2 * (i + theta) / len4;
1420  s->exp[i].re = RESCALE(cos(alpha) * scale);
1421  s->exp[i].im = RESCALE(sin(alpha) * scale);
1422  }
1423 
1424  return 0;
1425 }
1426 
1427 const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
1428  /* Split-Radix codelets */
1429  &TX_NAME(ff_tx_fft2_ns_def),
1430  &TX_NAME(ff_tx_fft4_ns_def),
1431  &TX_NAME(ff_tx_fft8_ns_def),
1432  &TX_NAME(ff_tx_fft16_ns_def),
1433  &TX_NAME(ff_tx_fft32_ns_def),
1434  &TX_NAME(ff_tx_fft64_ns_def),
1435  &TX_NAME(ff_tx_fft128_ns_def),
1436  &TX_NAME(ff_tx_fft256_ns_def),
1437  &TX_NAME(ff_tx_fft512_ns_def),
1438  &TX_NAME(ff_tx_fft1024_ns_def),
1439  &TX_NAME(ff_tx_fft2048_ns_def),
1440  &TX_NAME(ff_tx_fft4096_ns_def),
1441  &TX_NAME(ff_tx_fft8192_ns_def),
1442  &TX_NAME(ff_tx_fft16384_ns_def),
1443  &TX_NAME(ff_tx_fft32768_ns_def),
1444  &TX_NAME(ff_tx_fft65536_ns_def),
1445  &TX_NAME(ff_tx_fft131072_ns_def),
1446 
1447  /* Standalone transforms */
1448  &TX_NAME(ff_tx_fft_sr_def),
1449  &TX_NAME(ff_tx_fft_sr_inplace_def),
1450  &TX_NAME(ff_tx_fft_pfa_3xM_def),
1451  &TX_NAME(ff_tx_fft_pfa_5xM_def),
1452  &TX_NAME(ff_tx_fft_pfa_7xM_def),
1453  &TX_NAME(ff_tx_fft_pfa_9xM_def),
1454  &TX_NAME(ff_tx_fft_pfa_15xM_def),
1455  &TX_NAME(ff_tx_fft_naive_def),
1456  &TX_NAME(ff_tx_mdct_sr_fwd_def),
1457  &TX_NAME(ff_tx_mdct_sr_inv_def),
1458  &TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
1459  &TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
1460  &TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
1461  &TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
1462  &TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
1463  &TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
1464  &TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
1465  &TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
1466  &TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
1467  &TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
1468  &TX_NAME(ff_tx_mdct_naive_fwd_def),
1469  &TX_NAME(ff_tx_mdct_naive_inv_def),
1470  &TX_NAME(ff_tx_mdct_inv_full_def),
1471  &TX_NAME(ff_tx_rdft_r2c_def),
1472  &TX_NAME(ff_tx_rdft_c2r_def),
1473 
1474  NULL,
1475 };
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
ff_tx_fft_sr_combine
static void TX_NAME() ff_tx_fft_sr_combine(TXComplex *z, const TXSample *cos, int len)
Definition: tx_template.c:498
ff_tx_gen_ptwo_inplace_revtab_idx
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
Definition: tx.c:125
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
FILE * out
Definition: movenc.c:54
ff_ctz
#define ff_ctz
Definition: intmath.h:106
TRANSFORM
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:490
sr_tabs_init_once
static FFSRTabsInitOnce sr_tabs_init_once[]
Definition: tx_template.c:89
src1
const pixel * src1
Definition: h264pred_template.c:421
AVTXContext
Definition: tx_priv.h:201
TX_NAME
static const FFTXCodelet TX_NAME(ff_tx_fft_sr_def)
im
float im
Definition: fft.c:79
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
FFTXCodeletOptions
Definition: tx_priv.h:160
w
uint8_t w
Definition: llviddspenc.c:38
M_PI_2
#define M_PI_2
Definition: mathematics.h:55
ff_tx_gen_compound_mapping
int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
Definition: tx.c:42
CMUL3
#define CMUL3(c, a, b)
Definition: mdct15.c:42
DECL_RDFT
#define DECL_RDFT(name, inv)
Definition: tx_template.c:1324
t1
#define t1
Definition: regdef.h:29
fft15
static av_always_inline void fft15(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:463
FF_TX_CPU_FLAGS_ALL
#define FF_TX_CPU_FLAGS_ALL
Definition: tx_priv.h:196
FFSRTabsInitOnce::factors
int factors[TX_MAX_SUB]
Definition: tx_template.c:58
fft5
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:93
ff_tx_fft_naive
static void TX_NAME() ff_tx_fft_naive(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:743
SR_TABLE
#define SR_TABLE(len)
Definition: tx_template.c:30
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
DECL_FFT5
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:212
ff_tx_mdct_naive_fwd
static void TX_NAME() ff_tx_mdct_naive_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:866
ff_tx_rdft_init
static av_cold int TX_NAME() ff_tx_rdft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1280
DECL_SR_CODELET_DEF
#define DECL_SR_CODELET_DEF(n)
Definition: tx_template.c:535
tab
static const struct twinvq_data tab
Definition: twinvq_data.h:10345
sum_d
static void sum_d(const int *input, int *output, int len)
Definition: dcadct.c:51
val
static double val(void *priv, double ch)
Definition: aeval.c:77
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1388
TX_MAX_SUB
#define TX_MAX_SUB
Definition: tx_priv.h:166
TABLE_DEF
#define TABLE_DEF(name, size)
Definition: tx_template.c:27
FFTXCodelet::type
enum AVTXType type
Definition: tx_priv.h:171
mult
static int16_t mult(Float11 *f1, Float11 *f2)
Definition: g726.c:60
ff_tx_mdct_sr_init
static av_cold int TX_NAME() ff_tx_mdct_sr_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:943
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:179
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
c2r
static void c2r(float *buffer, int size)
Definition: af_apsyclip.c:386
s
#define s(width, name)
Definition: cbs_vp9.c:256
t7
#define t7
Definition: regdef.h:35
ff_tx_mdct_naive_init
static av_cold int TX_NAME() ff_tx_mdct_naive_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:854
FF_TX_FORWARD_ONLY
#define FF_TX_FORWARD_ONLY
Definition: tx_priv.h:147
FFTXCodelet::cpu_flags
int cpu_flags
Definition: tx_priv.h:193
FFSRTabsInitOnce::control
AVOnce control
Definition: tx_template.c:57
AV_TX_FULL_IMDCT
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
Definition: tx.h:136
opts
AVDictionary * opts
Definition: movenc.c:50
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:177
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
NULL
#define NULL
Definition: coverity.c:32
ff_tx_mdct_gen_exp
int TX_TAB() ff_tx_mdct_gen_exp(AVTXContext *s)
Definition: tx_template.c:1406
t5
#define t5
Definition: regdef.h:33
FFSRTabsInitOnce::func
void(* func)(void)
Definition: tx_template.c:56
t6
#define t6
Definition: regdef.h:34
AV_TX_INPLACE
@ AV_TX_INPLACE
Performs an in-place transformation on the input.
Definition: tx.h:122
r2c
static void r2c(float *buffer, int size)
Definition: af_apsyclip.c:377
FF_TX_OUT_OF_PLACE
#define FF_TX_OUT_OF_PLACE
Definition: tx_priv.h:143
ff_tx_fft8_ns
static void TX_NAME() ff_tx_fft8_ns(AVTXContext *s, void *dst, void *src, ptrdiff_t stride)
Definition: tx_template.c:592
AV_TX_UNALIGNED
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
Definition: tx.h:128
exp
int8_t exp
Definition: eval.c:72
DECL_COMP_MDCT
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:1213
AVOnce
#define AVOnce
Definition: thread.h:176
ff_tx_fft_pfa_init
static av_cold int TX_NAME() ff_tx_fft_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:781
FF_TX_PRESHUFFLE
#define FF_TX_PRESHUFFLE
Definition: tx_priv.h:145
ff_tx_fft_sr_codelet_init
static av_cold int TX_NAME() ff_tx_fft_sr_codelet_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:524
f
f
Definition: af_crystalizer.c:122
ff_tx_fft_sr_init
static av_cold int TX_NAME() ff_tx_fft_sr_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:654
ff_tx_init_tab_53
static av_cold void TX_TAB() ff_tx_init_tab_53(void)
Definition: tx_template.c:107
FF_TX_PRIO_BASE
@ FF_TX_PRIO_BASE
Definition: tx_priv.h:150
fft9
static av_always_inline void fft9(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:339
t8
#define t8
Definition: regdef.h:53
nptwo_tabs_init_once
static FFSRTabsInitOnce nptwo_tabs_init_once[]
Definition: tx_template.c:141
ff_tx_mdct_sr_fwd
static void TX_NAME() ff_tx_mdct_sr_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:970
BF
#define BF(a, b, c, s)
Definition: dct32_template.c:90
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
ff_tx_fft2_ns
static void TX_NAME() ff_tx_fft2_ns(AVTXContext *s, void *dst, void *src, ptrdiff_t stride)
Definition: tx_template.c:565
M_PI
#define M_PI
Definition: mathematics.h:52
TXComplex
void TXComplex
Definition: tx_priv.h:61
ff_tx_fft_sr_inplace
static void TX_NAME() ff_tx_fft_sr_inplace(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:694
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
ff_tx_gen_ptwo_revtab
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
Definition: tx.c:107
av_always_inline
#define av_always_inline
Definition: attributes.h:49
DECL_SR_CODELET
#define DECL_SR_CODELET(n, n2, n4)
Definition: tx_template.c:550
DECL_COMP_IMDCT
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:1155
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:264
len
int len
Definition: vorbis_enc_data.h:426
fft3
static av_always_inline void fft3(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:176
TX_LEN_UNLIMITED
#define TX_LEN_UNLIMITED
Definition: tx_priv.h:182
stride
#define stride
Definition: h264pred_template.c:537
ret
ret
Definition: filter_design.txt:187
ff_tx_init_subtx
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx.c:440
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
ff_tx_init_tab_7
static av_cold void TX_TAB() ff_tx_init_tab_7(void)
Definition: tx_template.c:119
TX_FACTOR_ANY
#define TX_FACTOR_ANY
Definition: tx_priv.h:178
FF_TX_INVERSE_ONLY
#define FF_TX_INVERSE_ONLY
Definition: tx_priv.h:146
ff_tx_init_tab_9
static av_cold void TX_TAB() ff_tx_init_tab_9(void)
Definition: tx_template.c:129
FFTXCodelet
Definition: tx_priv.h:168
ff_tx_init_tabs
av_cold void TX_TAB() ff_tx_init_tabs(int len)
Definition: tx_template.c:147
t2
#define t2
Definition: regdef.h:30
ff_tx_mdct_naive_inv
static void TX_NAME() ff_tx_mdct_naive_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:887
BUTTERFLIES
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:476
ff_tx_fft16_ns
static void TX_NAME() ff_tx_fft16_ns(AVTXContext *s, void *dst, void *src, ptrdiff_t stride)
Definition: tx_template.c:610
src0
const pixel *const src0
Definition: h264pred_template.c:420
FFTXCodelet::name
const char * name
Definition: tx_priv.h:169
factor
static const int factor[16]
Definition: vf_pp7.c:76
ff_tx_fft_sr
static void TX_NAME() ff_tx_fft_sr(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:678
FFTXCodeletOptions::invert_lookup
int invert_lookup
Definition: tx_priv.h:161
FFSRTabsInitOnce
Definition: tx_template.c:55
INIT_FF_SR_TAB
#define INIT_FF_SR_TAB(len)
Definition: tx_template.c:61
ff_tx_mdct_sr_inv
static void TX_NAME() ff_tx_mdct_sr_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1009
map
const VDPAUPixFmtMap * map
Definition: hwcontext_vdpau.c:71
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
fft7
static av_always_inline void fft7(TXComplex *out, TXComplex *in, ptrdiff_t stride)
Definition: tx_template.c:252
int32_t
int32_t
Definition: audioconvert.c:56
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
ff_tx_fft4_ns
static void TX_NAME() ff_tx_fft4_ns(AVTXContext *s, void *dst, void *src, ptrdiff_t stride)
Definition: tx_template.c:576
DECL_COMP_FFT
#define DECL_COMP_FFT(N)
Definition: tx_template.c:811
ff_tx_mdct_pfa_init
static av_cold int TX_NAME() ff_tx_mdct_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1117
ff_tx_mdct_inv_full_init
static av_cold int TX_NAME() ff_tx_mdct_inv_full_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_template.c:1065
CMUL
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:42
re
float re
Definition: fft.c:79
ff_tx_mdct_inv_full
static void TX_NAME() ff_tx_mdct_inv_full(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:1085
FF_TX_PRIO_MIN
@ FF_TX_PRIO_MIN
Definition: tx_priv.h:155