Go to the documentation of this file.
27 #define TABLE_DEF(name, size) \
28 DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size]
30 #define SR_POW2_TABLES \
47 #define SR_TABLE(len) \
48 TABLE_DEF(len, len/4 + 1);
63 #define SR_TABLE(len) \
64 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void) \
66 double freq = 2*M_PI/len; \
67 TXSample *tab = TX_TAB(ff_tx_tab_ ##len); \
69 for (int i = 0; i < len/4; i++) \
70 *tab++ = RESCALE(cos(i*freq)); \
78 #define SR_TABLE(len) TX_TAB(ff_tx_init_tab_ ##len),
84 #define SR_TABLE(len) AV_ONCE_INIT,
92 TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 *
M_PI / 5));
93 TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 *
M_PI / 5));
94 TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 *
M_PI / 10));
95 TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 *
M_PI / 10));
96 TX_TAB(ff_tx_tab_53)[4] = RESCALE(sin(2 *
M_PI / 5));
97 TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 *
M_PI / 5));
98 TX_TAB(ff_tx_tab_53)[6] = RESCALE(sin(2 *
M_PI / 10));
99 TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 *
M_PI / 10));
102 TX_TAB(ff_tx_tab_53)[ 8] = RESCALE(cos(2 *
M_PI / 12));
103 TX_TAB(ff_tx_tab_53)[ 9] = RESCALE(cos(2 *
M_PI / 12));
104 TX_TAB(ff_tx_tab_53)[10] = RESCALE(cos(2 *
M_PI / 6));
105 TX_TAB(ff_tx_tab_53)[11] = RESCALE(cos(8 *
M_PI / 6));
110 TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 *
M_PI / 7));
111 TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 *
M_PI / 7));
112 TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 *
M_PI / 28));
113 TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 *
M_PI / 28));
114 TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 *
M_PI / 14));
115 TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 *
M_PI / 14));
120 TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 *
M_PI / 3));
121 TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 *
M_PI / 3));
122 TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 *
M_PI / 9));
123 TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 *
M_PI / 9));
124 TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 *
M_PI / 36));
125 TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 *
M_PI / 36));
126 TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
127 TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
146 int idx = factor_2 - 3;
147 for (
int i = 0;
i <= idx;
i++)
175 const TXSample *
tab = TX_TAB(ff_tx_tab_53);
188 mtmp[0] = (int64_t)
tab[ 8] *
tmp[1].
re;
189 mtmp[1] = (int64_t)
tab[ 9] *
tmp[1].
im;
190 mtmp[2] = (int64_t)
tab[10] *
tmp[2].
re;
191 mtmp[3] = (int64_t)
tab[10] *
tmp[2].
im;
192 out[1*
stride].re =
tmp[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
193 out[1*
stride].im =
tmp[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
194 out[2*
stride].re =
tmp[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
195 out[2*
stride].im =
tmp[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
208 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
209 static av_always_inline void NAME(TXComplex *out, TXComplex *in, \
212 TXComplex dc, z0[4], t[6]; \
213 const TXSample *tab = TX_TAB(ff_tx_tab_53); \
216 BF(t[1].im, t[0].re, in[1].re, in[4].re); \
217 BF(t[1].re, t[0].im, in[1].im, in[4].im); \
218 BF(t[3].im, t[2].re, in[2].re, in[3].re); \
219 BF(t[3].re, t[2].im, in[2].im, in[3].im); \
221 out[D0*stride].re = dc.re + t[0].re + t[2].re; \
222 out[D0*stride].im = dc.im + t[0].im + t[2].im; \
224 SMUL(t[4].re, t[0].re, tab[0], tab[2], t[2].re, t[0].re); \
225 SMUL(t[4].im, t[0].im, tab[0], tab[2], t[2].im, t[0].im); \
226 CMUL(t[5].re, t[1].re, tab[4], tab[6], t[3].re, t[1].re); \
227 CMUL(t[5].im, t[1].im, tab[4], tab[6], t[3].im, t[1].im); \
229 BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
230 BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
231 BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
232 BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
234 out[D1*stride].re = dc.re + z0[3].re; \
235 out[D1*stride].im = dc.im + z0[0].im; \
236 out[D2*stride].re = dc.re + z0[2].re; \
237 out[D2*stride].im = dc.im + z0[1].im; \
238 out[D3*stride].re = dc.re + z0[1].re; \
239 out[D3*stride].im = dc.im + z0[2].im; \
240 out[D4*stride].re = dc.re + z0[0].re; \
241 out[D4*stride].im = dc.im + z0[3].im; \
266 out[0*
stride].re =
dc.re + t[0].re + t[2].re + t[4].re;
267 out[0*
stride].im =
dc.im + t[0].im + t[2].im + t[4].im;
270 mtmp[ 0] = ((int64_t)
tab[0].
re)*t[0].re - ((int64_t)
tab[2].
re)*t[4].re;
271 mtmp[ 1] = ((int64_t)
tab[0].
re)*t[4].re - ((int64_t)
tab[1].
re)*t[0].re;
272 mtmp[ 2] = ((int64_t)
tab[0].
re)*t[2].re - ((int64_t)
tab[2].
re)*t[0].re;
273 mtmp[ 3] = ((int64_t)
tab[0].
re)*t[0].im - ((int64_t)
tab[1].
re)*t[2].im;
274 mtmp[ 4] = ((int64_t)
tab[0].
re)*t[4].im - ((int64_t)
tab[1].
re)*t[0].im;
275 mtmp[ 5] = ((int64_t)
tab[0].
re)*t[2].im - ((int64_t)
tab[2].
re)*t[0].im;
277 mtmp[ 6] = ((int64_t)
tab[2].
im)*t[1].im + ((int64_t)
tab[1].
im)*t[5].im;
278 mtmp[ 7] = ((int64_t)
tab[0].
im)*t[5].im + ((int64_t)
tab[2].
im)*t[3].im;
279 mtmp[ 8] = ((int64_t)
tab[2].
im)*t[5].im + ((int64_t)
tab[1].
im)*t[3].im;
280 mtmp[ 9] = ((int64_t)
tab[0].
im)*t[1].re + ((int64_t)
tab[1].
im)*t[3].re;
281 mtmp[10] = ((int64_t)
tab[2].
im)*t[3].re + ((int64_t)
tab[0].
im)*t[5].re;
282 mtmp[11] = ((int64_t)
tab[2].
im)*t[1].re + ((int64_t)
tab[1].
im)*t[5].re;
284 z[0].re = (
int32_t)(mtmp[ 0] - ((int64_t)
tab[1].re)*t[2].
re + 0x40000000 >> 31);
285 z[1].re = (
int32_t)(mtmp[ 1] - ((int64_t)
tab[2].re)*t[2].
re + 0x40000000 >> 31);
286 z[2].re = (
int32_t)(mtmp[ 2] - ((int64_t)
tab[1].re)*t[4].
re + 0x40000000 >> 31);
287 z[0].im = (
int32_t)(mtmp[ 3] - ((int64_t)
tab[2].re)*t[4].
im + 0x40000000 >> 31);
288 z[1].im = (
int32_t)(mtmp[ 4] - ((int64_t)
tab[2].re)*t[2].
im + 0x40000000 >> 31);
289 z[2].im = (
int32_t)(mtmp[ 5] - ((int64_t)
tab[1].re)*t[4].
im + 0x40000000 >> 31);
291 t[0].re = (
int32_t)(mtmp[ 6] - ((int64_t)
tab[0].im)*t[3].
im + 0x40000000 >> 31);
292 t[2].re = (
int32_t)(mtmp[ 7] - ((int64_t)
tab[1].im)*t[1].
im + 0x40000000 >> 31);
293 t[4].re = (
int32_t)(mtmp[ 8] + ((int64_t)
tab[0].im)*t[1].
im + 0x40000000 >> 31);
294 t[0].im = (
int32_t)(mtmp[ 9] + ((int64_t)
tab[2].im)*t[5].
re + 0x40000000 >> 31);
295 t[2].im = (
int32_t)(mtmp[10] - ((int64_t)
tab[1].im)*t[1].
re + 0x40000000 >> 31);
296 t[4].im = (
int32_t)(mtmp[11] - ((int64_t)
tab[0].im)*t[3].
re + 0x40000000 >> 31);
298 z[0].re =
tab[0].re*t[0].re -
tab[2].re*t[4].re -
tab[1].re*t[2].re;
299 z[1].re =
tab[0].re*t[4].re -
tab[1].re*t[0].re -
tab[2].re*t[2].re;
300 z[2].re =
tab[0].re*t[2].re -
tab[2].re*t[0].re -
tab[1].re*t[4].re;
301 z[0].im =
tab[0].re*t[0].im -
tab[1].re*t[2].im -
tab[2].re*t[4].im;
302 z[1].im =
tab[0].re*t[4].im -
tab[1].re*t[0].im -
tab[2].re*t[2].im;
303 z[2].im =
tab[0].re*t[2].im -
tab[2].re*t[0].im -
tab[1].re*t[4].im;
308 t[0].re =
tab[2].im*t[1].im +
tab[1].im*t[5].im -
tab[0].im*t[3].im;
309 t[2].re =
tab[0].im*t[5].im +
tab[2].im*t[3].im -
tab[1].im*t[1].im;
310 t[4].re =
tab[2].im*t[5].im +
tab[1].im*t[3].im +
tab[0].im*t[1].im;
311 t[0].im =
tab[0].im*t[1].re +
tab[1].im*t[3].re +
tab[2].im*t[5].re;
312 t[2].im =
tab[2].im*t[3].re +
tab[0].im*t[5].re -
tab[1].im*t[1].re;
313 t[4].im =
tab[2].im*t[1].re +
tab[1].im*t[5].re -
tab[0].im*t[3].re;
356 w[0].re = t[0].re - t[6].re;
357 w[0].im = t[0].im - t[6].im;
358 w[1].re = t[2].re - t[6].re;
359 w[1].im = t[2].im - t[6].im;
360 w[2].re = t[1].re - t[7].re;
361 w[2].im = t[1].im - t[7].im;
362 w[3].re = t[3].re + t[7].re;
363 w[3].im = t[3].im + t[7].im;
365 z[0].re =
dc.re + t[4].re;
366 z[0].im =
dc.im + t[4].im;
368 z[1].re = t[0].re + t[2].re + t[6].re;
369 z[1].im = t[0].im + t[2].im + t[6].im;
375 mtmp[0] = t[1].re - t[3].re + t[7].re;
376 mtmp[1] = t[1].im - t[3].im + t[7].im;
378 y[3].re = (
int32_t)(((int64_t)
tab[0].
im)*mtmp[0] + 0x40000000 >> 31);
379 y[3].im = (
int32_t)(((int64_t)
tab[0].im)*mtmp[1] + 0x40000000 >> 31);
381 mtmp[0] = (
int32_t)(((int64_t)
tab[0].re)*z[1].
re + 0x40000000 >> 31);
382 mtmp[1] = (
int32_t)(((int64_t)
tab[0].re)*z[1].
im + 0x40000000 >> 31);
383 mtmp[2] = (
int32_t)(((int64_t)
tab[0].re)*t[4].
re + 0x40000000 >> 31);
384 mtmp[3] = (
int32_t)(((int64_t)
tab[0].re)*t[4].
im + 0x40000000 >> 31);
386 x[3].re = z[0].re + (
int32_t)mtmp[0];
387 x[3].im = z[0].im + (
int32_t)mtmp[1];
388 z[0].re = in[0].re + (
int32_t)mtmp[2];
389 z[0].im = in[0].im + (
int32_t)mtmp[3];
391 mtmp[0] = ((int64_t)
tab[1].
re)*
w[0].re;
392 mtmp[1] = ((int64_t)
tab[1].
re)*
w[0].im;
393 mtmp[2] = ((int64_t)
tab[2].
im)*
w[0].re;
394 mtmp[3] = ((int64_t)
tab[2].
im)*
w[0].im;
395 mtmp[4] = ((int64_t)
tab[1].
im)*
w[2].re;
396 mtmp[5] = ((int64_t)
tab[1].
im)*
w[2].im;
397 mtmp[6] = ((int64_t)
tab[2].
re)*
w[2].re;
398 mtmp[7] = ((int64_t)
tab[2].
re)*
w[2].im;
400 x[1].re = (
int32_t)(mtmp[0] + ((int64_t)
tab[2].im)*
w[1].
re + 0x40000000 >> 31);
401 x[1].im = (
int32_t)(mtmp[1] + ((int64_t)
tab[2].im)*
w[1].
im + 0x40000000 >> 31);
402 x[2].re = (
int32_t)(mtmp[2] - ((int64_t)
tab[3].re)*
w[1].
re + 0x40000000 >> 31);
403 x[2].im = (
int32_t)(mtmp[3] - ((int64_t)
tab[3].re)*
w[1].
im + 0x40000000 >> 31);
404 y[1].re = (
int32_t)(mtmp[4] + ((int64_t)
tab[2].re)*
w[3].
re + 0x40000000 >> 31);
405 y[1].im = (
int32_t)(mtmp[5] + ((int64_t)
tab[2].re)*
w[3].
im + 0x40000000 >> 31);
406 y[2].re = (
int32_t)(mtmp[6] - ((int64_t)
tab[3].im)*
w[3].
re + 0x40000000 >> 31);
407 y[2].im = (
int32_t)(mtmp[7] - ((int64_t)
tab[3].im)*
w[3].
im + 0x40000000 >> 31);
409 y[0].re = (
int32_t)(((int64_t)
tab[0].im)*t[5].
re + 0x40000000 >> 31);
410 y[0].im = (
int32_t)(((int64_t)
tab[0].im)*t[5].
im + 0x40000000 >> 31);
413 y[3].re =
tab[0].im*(t[1].re - t[3].re + t[7].re);
414 y[3].im =
tab[0].im*(t[1].im - t[3].im + t[7].im);
416 x[3].re = z[0].re +
tab[0].re*z[1].re;
417 x[3].im = z[0].im +
tab[0].re*z[1].im;
418 z[0].re =
dc.re +
tab[0].re*t[4].re;
419 z[0].im =
dc.im +
tab[0].re*t[4].im;
421 x[1].re =
tab[1].re*
w[0].re +
tab[2].im*
w[1].re;
422 x[1].im =
tab[1].re*
w[0].im +
tab[2].im*
w[1].im;
423 x[2].re =
tab[2].im*
w[0].re -
tab[3].re*
w[1].re;
424 x[2].im =
tab[2].im*
w[0].im -
tab[3].re*
w[1].im;
425 y[1].re =
tab[1].im*
w[2].re +
tab[2].re*
w[3].re;
426 y[1].im =
tab[1].im*
w[2].im +
tab[2].re*
w[3].im;
427 y[2].re =
tab[2].re*
w[2].re -
tab[3].im*
w[3].re;
428 y[2].im =
tab[2].re*
w[2].im -
tab[3].im*
w[3].im;
430 y[0].re =
tab[0].im*t[5].re;
431 y[0].im =
tab[0].im*t[5].im;
434 x[4].re = x[1].re + x[2].re;
435 x[4].im = x[1].im + x[2].im;
437 y[4].re = y[1].re - y[2].re;
438 y[4].im = y[1].im - y[2].im;
439 x[1].re = z[0].re + x[1].re;
440 x[1].im = z[0].im + x[1].im;
441 y[1].re = y[0].re + y[1].re;
442 y[1].im = y[0].im + y[1].im;
443 x[2].re = z[0].re + x[2].re;
444 x[2].im = z[0].im + x[2].im;
445 y[2].re = y[2].re - y[0].re;
446 y[2].im = y[2].im - y[0].im;
447 x[4].re = z[0].re - x[4].re;
448 x[4].im = z[0].im - x[4].im;
449 y[4].re = y[0].re - y[4].re;
450 y[4].im = y[0].im - y[4].im;
467 for (
int i = 0;
i < 5;
i++)
493 #define DECL_FACTOR_S(n) \
494 static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst, \
495 void *src, ptrdiff_t stride) \
497 fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex)); \
499 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
500 .name = TX_NAME_STR("fft" #n "_ns"), \
501 .function = TX_NAME(ff_tx_fft##n), \
502 .type = TX_TYPE(FFT), \
503 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
504 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
509 .init = TX_NAME(ff_tx_fft_factor_init), \
510 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
511 .prio = FF_TX_PRIO_BASE, \
514 #define DECL_FACTOR_F(n) \
516 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \
517 .name = TX_NAME_STR("fft" #n "_fwd"), \
518 .function = TX_NAME(ff_tx_fft##n), \
519 .type = TX_TYPE(FFT), \
520 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
521 AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
526 .init = TX_NAME(ff_tx_fft_factor_init), \
527 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
528 .prio = FF_TX_PRIO_BASE, \
537 #define BUTTERFLIES(a0, a1, a2, a3) \
543 BF(t3, t5, t5, t1); \
544 BF(a2.re, a0.re, r0, t5); \
545 BF(a3.im, a1.im, i1, t3); \
546 BF(t4, t6, t2, t6); \
547 BF(a3.re, a1.re, r1, t4); \
548 BF(a2.im, a0.im, i0, t6); \
551 #define TRANSFORM(a0, a1, a2, a3, wre, wim) \
553 CMUL(t1, t2, a2.re, a2.im, wre, -wim); \
554 CMUL(t5, t6, a3.re, a3.im, wre, wim); \
555 BUTTERFLIES(a0, a1, a2, a3); \
560 const TXSample *cos,
int len)
565 const TXSample *wim = cos + o1 - 7;
568 for (
int i = 0;
i <
len;
i += 4) {
569 TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
570 TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
571 TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
572 TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
574 TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
575 TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
576 TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
577 TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
596 #define DECL_SR_CODELET_DEF(n) \
597 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
598 .name = TX_NAME_STR("fft" #n "_ns"), \
599 .function = TX_NAME(ff_tx_fft##n##_ns), \
600 .type = TX_TYPE(FFT), \
601 .flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \
602 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
607 .init = TX_NAME(ff_tx_fft_sr_codelet_init), \
608 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
609 .prio = FF_TX_PRIO_BASE, \
612 #define DECL_SR_CODELET(n, n2, n4) \
613 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *_dst, \
614 void *_src, ptrdiff_t stride) \
616 TXComplex *src = _src; \
617 TXComplex *dst = _dst; \
618 const TXSample *cos = TX_TAB(ff_tx_tab_##n); \
620 TX_NAME(ff_tx_fft##n2##_ns)(s, dst, src, stride); \
621 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*2, src + n4*2, stride); \
622 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*3, src + n4*3, stride); \
623 TX_NAME(ff_tx_fft_sr_combine)(dst, cos, n4 >> 1); \
626 DECL_SR_CODELET_DEF(n)
629 void *_src, ptrdiff_t
stride)
641 void *_src, ptrdiff_t
stride)
658 void *_src, ptrdiff_t
stride)
663 const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
673 TRANSFORM(dst[1], dst[3], dst[5], dst[7], cos, cos);
677 void *_src, ptrdiff_t
stride)
681 const TXSample *cos = TX_TAB(ff_tx_tab_16);
684 TXSample cos_16_1 = cos[1];
685 TXSample cos_16_2 = cos[2];
686 TXSample cos_16_3 = cos[3];
698 TRANSFORM(dst[ 2], dst[ 6], dst[10], dst[14], cos_16_2, cos_16_2);
699 TRANSFORM(dst[ 1], dst[ 5], dst[ 9], dst[13], cos_16_1, cos_16_3);
700 TRANSFORM(dst[ 3], dst[ 7], dst[11], dst[15], cos_16_3, cos_16_1);
761 void *_src, ptrdiff_t
stride)
766 int *
map =
s->sub[0].map;
771 for (
int i = 0;
i <
len;
i++)
774 s->fn[0](&
s->sub[0], dst2, dst1,
stride);
778 void *_src, ptrdiff_t
stride)
783 const int *
map =
s->sub->map;
784 const int *inplace_idx =
s->map;
785 int src_idx, dst_idx;
787 src_idx = *inplace_idx++;
790 dst_idx =
map[src_idx];
793 dst_idx =
map[dst_idx];
794 }
while (dst_idx != src_idx);
796 }
while ((src_idx = *inplace_idx++));
802 .
name = TX_NAME_STR(
"fft"),
816 .
name = TX_NAME_STR(
"fft_inplace_small"),
830 .
name = TX_NAME_STR(
"fft_inplace"),
855 for (
int i = 0;
i <
len;
i++) {
856 for (
int j = 0; j <
len; j++) {
857 const double factor = phase*
i*j;
873 const int n =
s->len;
874 double phase =
s->inv ? 2.0*
M_PI/n : -2.0*
M_PI/n;
878 for (
int i = 0;
i < n;
i++) {
880 for (
int j = 0; j < n; j++) {
881 const double factor = phase*
i*j;
900 const int n =
s->len;
904 for (
int i = 0;
i < n;
i++) {
906 for (
int j = 0; j < n; j++) {
918 .
name = TX_NAME_STR(
"fft_naive_small"),
932 .
name = TX_NAME_STR(
"fft_naive"),
954 size_t extra_tmp_len = 0;
961 for (
int i = 0;
i <
ret;
i++) {
962 int len1 = len_list[
i];
963 int len2 =
len / len1;
966 if (len2 & (len2 - 1))
981 }
else if (
ret < 0) {
1002 }
else if (
ret < 0) {
1009 }
else if (
ret < 0) {
1029 s->sub[0].len,
s->sub[1].len)))
1036 tmp = (
int *)
s->tmp;
1037 for (
int k = 0; k <
len; k +=
s->sub[0].len) {
1038 memcpy(
tmp, &
s->map[k],
s->sub[0].len*
sizeof(*
tmp));
1039 for (
int i = 0;
i <
s->sub[0].len;
i++)
1040 s->map[k +
i] =
tmp[
s->sub[0].map[
i]];
1045 extra_tmp_len =
len;
1047 extra_tmp_len =
s->sub[0].len;
1049 if (extra_tmp_len && !(
s->exp =
av_malloc(extra_tmp_len*
sizeof(*
s->exp))))
1056 void *_in, ptrdiff_t
stride)
1058 const int n =
s->sub[0].len, m =
s->sub[1].len, l =
s->len;
1059 const int *in_map =
s->map, *out_map = in_map + l;
1060 const int *sub_map =
s->sub[1].map;
1066 for (
int i = 0;
i < m;
i++) {
1067 for (
int j = 0; j < n; j++)
1068 s->exp[j] = in[in_map[
i*n + j]];
1069 s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]],
s->exp, m*
sizeof(
TXComplex));
1072 for (
int i = 0;
i < n;
i++)
1073 s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i],
sizeof(
TXComplex));
1075 for (
int i = 0;
i < l;
i++)
1080 void *_in, ptrdiff_t
stride)
1082 const int n =
s->sub[0].len, m =
s->sub[1].len, l =
s->len;
1083 const int *in_map =
s->map, *out_map = in_map + l;
1084 const int *sub_map =
s->sub[1].map;
1090 for (
int i = 0;
i < m;
i++)
1091 s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]], &in[
i*n], m*
sizeof(
TXComplex));
1093 for (
int i = 0;
i < n;
i++)
1094 s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i],
sizeof(
TXComplex));
1096 for (
int i = 0;
i < l;
i++)
1101 .
name = TX_NAME_STR(
"fft_pfa"),
1115 .
name = TX_NAME_STR(
"fft_pfa_ns"),
1136 s->scale_d = *((SCALE_TYPE *)
scale);
1137 s->scale_f =
s->scale_d;
1142 void *_src, ptrdiff_t
stride)
1144 TXSample *
src = _src;
1145 TXSample *dst = _dst;
1146 double scale =
s->scale_d;
1148 const double phase =
M_PI/(4.0*
len);
1152 for (
int i = 0;
i <
len;
i++) {
1154 for (
int j = 0; j <
len*2; j++) {
1155 int a = (2*j + 1 +
len) * (2*
i + 1);
1156 sum += UNSCALE(
src[j]) * cos(
a * phase);
1163 void *_src, ptrdiff_t
stride)
1165 TXSample *
src = _src;
1166 TXSample *dst = _dst;
1167 double scale =
s->scale_d;
1168 int len =
s->len >> 1;
1170 const double phase =
M_PI/(4.0*len2);
1174 for (
int i = 0;
i <
len;
i++) {
1177 double i_d = phase * (4*
len - 2*
i - 1);
1178 double i_u = phase * (3*len2 + 2*
i + 1);
1179 for (
int j = 0; j < len2; j++) {
1180 double a = (2 * j + 1);
1181 double a_d = cos(
a * i_d);
1182 double a_u = cos(
a * i_u);
1193 .
name = TX_NAME_STR(
"mdct_naive_fwd"),
1207 .
name = TX_NAME_STR(
"mdct_naive_inv"),
1232 s->scale_d = *((SCALE_TYPE *)
scale);
1233 s->scale_f =
s->scale_d;
1253 memcpy(
s->map,
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
1255 for (
int i = 0; i < len >> 1;
i++)
1264 for (
int i = 0;
i < (
s->len >> 1);
i++)
1273 TXSample *
src = _src, *dst = _dst;
1275 const int len2 =
s->len >> 1;
1276 const int len4 =
s->len >> 2;
1277 const int len3 = len2 * 3;
1278 const int *sub_map =
s->map;
1282 for (
int i = 0;
i < len2;
i++) {
1284 const int idx = sub_map[
i];
1286 tmp.re = FOLD(-
src[ len2 + k],
src[1*len2 - 1 - k]);
1287 tmp.im = FOLD(-
src[ len3 + k], -
src[1*len3 - 1 - k]);
1289 tmp.re = FOLD(-
src[ len2 + k], -
src[5*len2 - 1 - k]);
1290 tmp.im = FOLD(
src[-len2 + k], -
src[1*len3 - 1 - k]);
1297 for (
int i = 0;
i < len4;
i++) {
1298 const int i0 = len4 +
i, i1 = len4 -
i - 1;
1313 const TXSample *
src = _src, *in1, *in2;
1314 const int len2 =
s->len >> 1;
1315 const int len4 =
s->len >> 2;
1316 const int *sub_map =
s->map;
1322 for (
int i = 0;
i < len2;
i++) {
1331 for (
int i = 0;
i < len4;
i++) {
1332 const int i0 = len4 +
i, i1 = len4 -
i - 1;
1342 .
name = TX_NAME_STR(
"mdct_fwd"),
1356 .
name = TX_NAME_STR(
"mdct_inv"),
1378 s->scale_d = *((SCALE_TYPE *)
scale);
1379 s->scale_f =
s->scale_d;
1390 void *_src, ptrdiff_t
stride)
1392 int len =
s->len << 1;
1393 int len2 =
len >> 1;
1394 int len4 =
len >> 2;
1395 TXSample *dst = _dst;
1397 s->fn[0](&
s->sub[0], dst + len4, _src,
stride);
1401 for (
int i = 0;
i < len4;
i++) {
1408 .
name = TX_NAME_STR(
"mdct_inv_full"),
1433 sub_len =
len / cd->factors[0];
1435 s->scale_d = *((SCALE_TYPE *)
scale);
1436 s->scale_f =
s->scale_d;
1443 sub_len, inv,
scale)))
1450 if (cd->factors[0] == 15)
1457 for (
int i = 0;
i <
len;
i++)
1468 #define DECL_COMP_IMDCT(N) \
1469 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst, \
1470 void *_src, ptrdiff_t stride) \
1472 TXComplex fft##N##in[N]; \
1473 TXComplex *z = _dst, *exp = s->exp; \
1474 const TXSample *src = _src, *in1, *in2; \
1475 const int len4 = s->len >> 2; \
1476 const int len2 = s->len >> 1; \
1477 const int m = s->sub->len; \
1478 const int *in_map = s->map, *out_map = in_map + N*m; \
1479 const int *sub_map = s->sub->map; \
1481 stride /= sizeof(*src); \
1483 in2 = src + ((N*m*2) - 1) * stride; \
1485 for (int i = 0; i < len2; i += N) { \
1486 for (int j = 0; j < N; j++) { \
1487 const int k = in_map[j]; \
1488 TXComplex tmp = { in2[-k*stride], in1[k*stride] }; \
1489 CMUL3(fft##N##in[j], tmp, exp[j]); \
1491 fft##N(s->tmp + *(sub_map++), fft##N##in, m); \
1496 for (int i = 0; i < N; i++) \
1497 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1499 for (int i = 0; i < len4; i++) { \
1500 const int i0 = len4 + i, i1 = len4 - i - 1; \
1501 const int s0 = out_map[i0], s1 = out_map[i1]; \
1502 TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
1503 TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
1505 CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
1506 CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
1510 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \
1511 .name = TX_NAME_STR("mdct_pfa_" #N "xM_inv"), \
1512 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv), \
1513 .type = TX_TYPE(MDCT), \
1514 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
1515 .factors = { N, TX_FACTOR_ANY }, \
1518 .max_len = TX_LEN_UNLIMITED, \
1519 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1520 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1521 .prio = FF_TX_PRIO_BASE, \
1530 #define DECL_COMP_MDCT(N) \
1531 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst, \
1532 void *_src, ptrdiff_t stride) \
1534 TXComplex fft##N##in[N]; \
1535 TXSample *src = _src, *dst = _dst; \
1536 TXComplex *exp = s->exp, tmp; \
1537 const int m = s->sub->len; \
1538 const int len4 = N*m; \
1539 const int len3 = len4 * 3; \
1540 const int len8 = s->len >> 2; \
1541 const int *in_map = s->map, *out_map = in_map + N*m; \
1542 const int *sub_map = s->sub->map; \
1544 stride /= sizeof(*dst); \
1546 for (int i = 0; i < m; i++) { \
1547 for (int j = 0; j < N; j++) { \
1548 const int k = in_map[i*N + j]; \
1550 tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
1551 tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
1553 tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
1554 tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
1556 CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
1557 exp[k >> 1].re, exp[k >> 1].im); \
1559 fft##N(s->tmp + sub_map[i], fft##N##in, m); \
1562 for (int i = 0; i < N; i++) \
1563 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1565 for (int i = 0; i < len8; i++) { \
1566 const int i0 = len8 + i, i1 = len8 - i - 1; \
1567 const int s0 = out_map[i0], s1 = out_map[i1]; \
1568 TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
1569 TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
1571 CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
1572 exp[i0].im, exp[i0].re); \
1573 CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
1574 exp[i1].im, exp[i1].re); \
1578 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \
1579 .name = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"), \
1580 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd), \
1581 .type = TX_TYPE(MDCT), \
1582 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1583 .factors = { N, TX_FACTOR_ANY }, \
1586 .max_len = TX_LEN_UNLIMITED, \
1587 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1588 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1589 .prio = FF_TX_PRIO_BASE, \
1609 s->scale_d = *((SCALE_TYPE *)
scale);
1610 s->scale_f =
s->scale_d;
1618 tab = (TXSample *)
s->exp;
1622 m = (inv ? 2*
s->scale_d :
s->scale_d);
1624 *
tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
1625 *
tab++ = RESCALE(inv ? 0.5*m : 1.0*m);
1626 *
tab++ = RESCALE( m);
1627 *
tab++ = RESCALE(-m);
1629 *
tab++ = RESCALE( (0.5 - 0.0) * m);
1630 *
tab++ = RESCALE( (0.0 - 0.5) * m);
1631 *
tab++ = RESCALE( (0.5 - inv) * m);
1632 *
tab++ = RESCALE(-(0.5 - inv) * m);
1634 for (
int i = 0; i < len >> 2;
i++)
1635 *
tab++ = RESCALE(cos(
i*
f));
1636 for (
int i =
len >> 2;
i >= 0;
i--)
1637 *
tab++ = RESCALE(cos(
i*
f) * (inv ? +1.0 : -1.0));
1642 #define DECL_RDFT(name, inv) \
1643 static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
1644 void *_src, ptrdiff_t stride) \
1646 const int len2 = s->len >> 1; \
1647 const int len4 = s->len >> 2; \
1648 const TXSample *fact = (void *)s->exp; \
1649 const TXSample *tcos = fact + 8; \
1650 const TXSample *tsin = tcos + len4; \
1651 TXComplex *data = inv ? _src : _dst; \
1655 s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex)); \
1657 data[0].im = data[len2].re; \
1662 t[0].re = data[0].re; \
1663 data[0].re = t[0].re + data[0].im; \
1664 data[0].im = t[0].re - data[0].im; \
1665 data[ 0].re = MULT(fact[0], data[ 0].re); \
1666 data[ 0].im = MULT(fact[1], data[ 0].im); \
1667 data[len4].re = MULT(fact[2], data[len4].re); \
1668 data[len4].im = MULT(fact[3], data[len4].im); \
1670 for (int i = 1; i < len4; i++) { \
1672 t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re)); \
1673 t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im)); \
1674 t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im)); \
1675 t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re)); \
1678 CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]); \
1680 data[ i].re = t[0].re + t[2].re; \
1681 data[ i].im = t[2].im - t[0].im; \
1682 data[len2 - i].re = t[0].re - t[2].re; \
1683 data[len2 - i].im = t[2].im + t[0].im; \
1687 s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex)); \
1690 data[len2].re = data[0].im; \
1691 data[ 0].im = data[len2].im = 0; \
1699 .
name = TX_NAME_STR(
"rdft_r2c"),
1700 .function =
TX_NAME(ff_tx_rdft_r2c),
1714 .
name = TX_NAME_STR(
"rdft_c2r"),
1715 .function =
TX_NAME(ff_tx_rdft_c2r),
1738 SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
1753 tab = (TXSample *)
s->exp;
1757 for (
int i = 0;
i <
len;
i++)
1758 tab[
i] = RESCALE(cos(
i*freq)*(!inv + 1));
1761 for (
int i = 0;
i <
len/2;
i++)
1762 tab[
len +
i] = RESCALE(0.5 / sin((2*
i + 1)*freq));
1764 for (
int i = 0;
i <
len/2;
i++)
1765 tab[
len +
i] = RESCALE(cos((
len - 2*
i - 1)*freq));
1772 void *_src, ptrdiff_t
stride)
1774 TXSample *dst = _dst;
1775 TXSample *
src = _src;
1776 const int len =
s->len;
1777 const int len2 =
len >> 1;
1778 const TXSample *
exp = (
void *)
s->exp;
1783 TXSample tmp1, tmp2;
1786 for (
int i = 0;
i < len2;
i++) {
1787 TXSample in1 =
src[
i];
1788 TXSample in2 =
src[
len -
i - 1];
1798 tmp2 = (tmp2 + 0x40000000) >> 31;
1800 tmp1 = (in1 + in2)*0.5;
1801 tmp2 = (in1 - in2)*
s;
1804 src[
i] = tmp1 + tmp2;
1805 src[
len -
i - 1] = tmp1 - tmp2;
1812 for (
int i =
len - 2;
i > 0;
i -= 2) {
1823 tmp1 = ((int64_t)
exp[0]) * ((int64_t)dst[0]);
1824 dst[0] = (tmp1 + 0x40000000) >> 31;
1826 dst[0] =
exp[0] * dst[0];
1832 void *_src, ptrdiff_t
stride)
1834 TXSample *dst = _dst;
1835 TXSample *
src = _src;
1836 const int len =
s->len;
1837 const int len2 =
len >> 1;
1838 const TXSample *
exp = (
void *)
s->exp;
1840 int64_t tmp1, tmp2 =
src[
len - 1];
1841 tmp2 = (2*tmp2 + 0x40000000) >> 31;
1843 TXSample tmp1, tmp2 = 2*
src[
len - 1];
1848 for (
int i =
len - 2;
i >= 2;
i -= 2) {
1849 TXSample val1 =
src[
i - 0];
1850 TXSample val2 =
src[
i - 1] -
src[
i + 1];
1855 s->fn[0](&
s->sub[0], dst,
src,
sizeof(
float));
1857 for (
int i = 0;
i < len2;
i++) {
1858 TXSample in1 = dst[
i];
1859 TXSample in2 = dst[
len -
i - 1];
1866 tmp2 = (tmp2 + 0x40000000) >> 31;
1869 dst[
i] = tmp1 + tmp2;
1870 dst[
len -
i - 1] = tmp1 - tmp2;
1875 .
name = TX_NAME_STR(
"dctII"),
1889 .
name = TX_NAME_STR(
"dctIII"),
1905 int len4 =
s->len >> 1;
1906 double scale =
s->scale_d;
1907 const double theta = (
scale < 0 ? len4 : 0) + 1.0/8.0;
1908 size_t alloc = pre_tab ? 2*len4 : len4;
1918 for (
int i = 0;
i < len4;
i++) {
1925 for (
int i = 0;
i < len4;
i++)
1926 s->exp[
i] =
s->exp[len4 + pre_tab[
i]];
1939 &
TX_NAME(ff_tx_fft128_ns_def),
1940 &
TX_NAME(ff_tx_fft256_ns_def),
1941 &
TX_NAME(ff_tx_fft512_ns_def),
1942 &
TX_NAME(ff_tx_fft1024_ns_def),
1943 &
TX_NAME(ff_tx_fft2048_ns_def),
1944 &
TX_NAME(ff_tx_fft4096_ns_def),
1945 &
TX_NAME(ff_tx_fft8192_ns_def),
1946 &
TX_NAME(ff_tx_fft16384_ns_def),
1947 &
TX_NAME(ff_tx_fft32768_ns_def),
1948 &
TX_NAME(ff_tx_fft65536_ns_def),
1949 &
TX_NAME(ff_tx_fft131072_ns_def),
1966 &
TX_NAME(ff_tx_fft_inplace_def),
1967 &
TX_NAME(ff_tx_fft_inplace_small_def),
1969 &
TX_NAME(ff_tx_fft_pfa_ns_def),
1970 &
TX_NAME(ff_tx_fft_naive_def),
1971 &
TX_NAME(ff_tx_fft_naive_small_def),
1974 &
TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
1975 &
TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
1976 &
TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
1977 &
TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
1978 &
TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
1979 &
TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
1980 &
TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
1981 &
TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
1982 &
TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
1983 &
TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
1984 &
TX_NAME(ff_tx_mdct_naive_fwd_def),
1985 &
TX_NAME(ff_tx_mdct_naive_inv_def),
1986 &
TX_NAME(ff_tx_mdct_inv_full_def),
int(* func)(AVBPrint *dst, const char *in, const char *arg)
static void TX_NAME() ff_tx_fft_sr_combine(TXComplex *z, const TXSample *cos, int len)
static av_cold int TX_NAME() ff_tx_dct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
static void TX_NAME() ff_tx_fft(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define TX_MAX_DECOMPOSITIONS
static void TX_NAME() ff_tx_fft_pfa(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
static void TX_NAME() ff_tx_fft16_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define DECL_RDFT(name, inv)
int ff_tx_gen_inplace_map(AVTXContext *s, int len)
static av_always_inline void fft15(TXComplex *out, TXComplex *in, ptrdiff_t stride)
#define FF_TX_CPU_FLAGS_ALL
int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts, int inv, int n, int m)
static void TX_NAME() ff_tx_fft_naive(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
static void TX_NAME() ff_tx_mdct_naive_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_rdft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define DECL_SR_CODELET_DEF(n)
static SR_POW2_TABLES void(*const sr_tabs_init_funcs[])(void)
static const FFTXCodelet TX_NAME(ff_tx_fft_def)
static void sum_d(const int *input, int *output, int len)
static AVOnce sr_tabs_init_once[]
static double val(void *priv, double ch)
static av_always_inline float scale(float x, float s)
#define TABLE_DEF(name, size)
static int16_t mult(Float11 *f1, Float11 *f2)
static int ff_thread_once(char *control, void(*routine)(void))
#define FF_ARRAY_ELEMS(a)
static void c2r(float *buffer, int size)
static av_cold int TX_NAME() ff_tx_fft_factor_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_mdct_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_mdct_naive_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define FF_TX_FORWARD_ONLY
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
static __device__ float fabs(float a)
static av_cold int TX_NAME() ff_tx_mdct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
@ AV_TX_INPLACE
Allows for in-place transformations, where input == output.
int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
static void r2c(float *buffer, int size)
#define FF_TX_OUT_OF_PLACE
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
static void TX_NAME() ff_tx_dctIII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define DECL_COMP_MDCT(N)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static const uint8_t tab[16]
static av_cold int TX_NAME() ff_tx_fft_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
void ff_tx_clear_ctx(AVTXContext *s)
static void TX_NAME() ff_tx_fft2_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_fft_sr_codelet_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
static av_cold void TX_TAB() ff_tx_init_tab_53(void)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
static void TX_NAME() ff_tx_fft8_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_always_inline void fft9(TXComplex *out, TXComplex *in, ptrdiff_t stride)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define TX_EMBED_INPUT_PFA_MAP(map, tot_len, d1, d2)
static void TX_NAME() ff_tx_fft_inplace(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_fft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_mdct_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define i(width, name, range_min, range_max)
#define av_malloc_array(a, b)
static AVOnce nptwo_tabs_init_once[]
static av_cold int TX_NAME() ff_tx_fft_init_naive_small(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define DECL_SR_CODELET(n, n2, n4)
#define DECL_COMP_IMDCT(N)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
static av_always_inline void fft3(TXComplex *out, TXComplex *in, ptrdiff_t stride)
static const FFTabInitData nptwo_tabs_init_data[]
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define FFSWAP(type, a, b)
static av_cold void TX_TAB() ff_tx_init_tab_7(void)
#define FF_TX_INVERSE_ONLY
static void TX_NAME() ff_tx_fft_naive_small(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold void TX_TAB() ff_tx_init_tab_9(void)
av_cold void TX_TAB() ff_tx_init_tabs(int len)
static void TX_NAME() ff_tx_mdct_naive_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static void TX_NAME() ff_tx_dctII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define BUTTERFLIES(a0, a1, a2, a3)
static void TX_NAME() ff_tx_fft_pfa_ns(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
static const int factor[16]
static av_cold int TX_NAME() ff_tx_fft_inplace_small_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
const VDPAUPixFmtMap * map
static const int16_t alpha[]
static av_always_inline void fft7(TXComplex *out, TXComplex *in, ptrdiff_t stride)
#define flags(name, subs,...)
int TX_TAB() ff_tx_mdct_gen_exp(AVTXContext *s, int *pre_tab)
int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts, int d1, int d2)
static av_cold int TX_NAME() ff_tx_mdct_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_fft4_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_mdct_inv_full_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type, int len, int inv)
#define CMUL(dre, dim, are, aim, bre, bim)
static void TX_NAME() ff_tx_mdct_inv_full(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)