Go to the documentation of this file.
   29 #define TABLE_DEF(name, size) \ 
   30     DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size] 
   32 #define SR_POW2_TABLES \ 
   53 #define SR_TABLE(len) \ 
   54     TABLE_DEF(len, len/4 + 1); 
   69 #define SR_TABLE(len)                                              \ 
   70 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void)            \ 
   72     double freq = 2*M_PI/len;                                      \ 
   73     TXSample *tab = TX_TAB(ff_tx_tab_ ##len);                      \ 
   75     for (int i = 0; i < len/4; i++)                                \ 
   76         *tab++ = RESCALE(cos(i*freq));                             \ 
   84 #define SR_TABLE(len) TX_TAB(ff_tx_init_tab_ ##len), 
   90 #define SR_TABLE(len) AV_ONCE_INIT, 
   98     TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 * 
M_PI /  5));
 
   99     TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 * 
M_PI /  5));
 
  100     TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 * 
M_PI / 10));
 
  101     TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 * 
M_PI / 10));
 
  102     TX_TAB(ff_tx_tab_53)[4] = RESCALE(sin(2 * 
M_PI /  5));
 
  103     TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 * 
M_PI /  5));
 
  104     TX_TAB(ff_tx_tab_53)[6] = RESCALE(sin(2 * 
M_PI / 10));
 
  105     TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 * 
M_PI / 10));
 
  108     TX_TAB(ff_tx_tab_53)[ 8] = RESCALE(cos(2 * 
M_PI / 12));
 
  109     TX_TAB(ff_tx_tab_53)[ 9] = RESCALE(cos(2 * 
M_PI / 12));
 
  110     TX_TAB(ff_tx_tab_53)[10] = RESCALE(cos(2 * 
M_PI /  6));
 
  111     TX_TAB(ff_tx_tab_53)[11] = RESCALE(cos(8 * 
M_PI /  6));
 
  116     TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 * 
M_PI /  7));
 
  117     TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 * 
M_PI /  7));
 
  118     TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 * 
M_PI / 28));
 
  119     TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 * 
M_PI / 28));
 
  120     TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 * 
M_PI / 14));
 
  121     TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 * 
M_PI / 14));
 
  126     TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 * 
M_PI /  3));
 
  127     TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 * 
M_PI /  3));
 
  128     TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 * 
M_PI /  9));
 
  129     TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 * 
M_PI /  9));
 
  130     TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 * 
M_PI / 36));
 
  131     TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 * 
M_PI / 36));
 
  132     TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
 
  133     TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
 
  152         int idx = factor_2 - 3;
 
  153         for (
int i = 0; 
i <= idx; 
i++)
 
  181     const TXSample *
tab = TX_TAB(ff_tx_tab_53);
 
  187     BF(
tmp[1].re, 
tmp[2].im, in[1].im, in[2].im);
 
  188     BF(
tmp[1].im, 
tmp[2].re, in[1].re, in[2].re);
 
  197     out[1*
stride].re = 
tmp[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
 
  198     out[1*
stride].im = 
tmp[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
 
  199     out[2*
stride].re = 
tmp[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
 
  200     out[2*
stride].im = 
tmp[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
 
  215 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4)                         \ 
  216 static av_always_inline void NAME(TXComplex *out, TXComplex *in,    \ 
  219     TXComplex dc, z0[4], t[6];                                      \ 
  220     const TXSample *tab = TX_TAB(ff_tx_tab_53);                     \ 
  223     BF(t[1].im, t[0].re, in[1].re, in[4].re);                       \ 
  224     BF(t[1].re, t[0].im, in[1].im, in[4].im);                       \ 
  225     BF(t[3].im, t[2].re, in[2].re, in[3].re);                       \ 
  226     BF(t[3].re, t[2].im, in[2].im, in[3].im);                       \ 
  228     out[D0*stride].re = dc.re + (TXUSample)t[0].re + t[2].re;        \ 
  229     out[D0*stride].im = dc.im + (TXUSample)t[0].im + t[2].im;        \ 
  231     SMUL(t[4].re, t[0].re, tab[0], tab[2], t[2].re, t[0].re);       \ 
  232     SMUL(t[4].im, t[0].im, tab[0], tab[2], t[2].im, t[0].im);       \ 
  233     CMUL(t[5].re, t[1].re, tab[4], tab[6], t[3].re, t[1].re);       \ 
  234     CMUL(t[5].im, t[1].im, tab[4], tab[6], t[3].im, t[1].im);       \ 
  236     BF(z0[0].re, z0[3].re, t[0].re, t[1].re);                       \ 
  237     BF(z0[0].im, z0[3].im, t[0].im, t[1].im);                       \ 
  238     BF(z0[2].re, z0[1].re, t[4].re, t[5].re);                       \ 
  239     BF(z0[2].im, z0[1].im, t[4].im, t[5].im);                       \ 
  241     out[D1*stride].re = dc.re + (TXUSample)z0[3].re;                 \ 
  242     out[D1*stride].im = dc.im + (TXUSample)z0[0].im;                 \ 
  243     out[D2*stride].re = dc.re + (TXUSample)z0[2].re;                 \ 
  244     out[D2*stride].im = dc.im + (TXUSample)z0[1].im;                 \ 
  245     out[D3*stride].re = dc.re + (TXUSample)z0[1].re;                 \ 
  246     out[D3*stride].im = dc.im + (TXUSample)z0[2].im;                 \ 
  247     out[D4*stride].re = dc.re + (TXUSample)z0[0].re;                 \ 
  248     out[D4*stride].im = dc.im + (TXUSample)z0[3].im;                 \ 
  266     BF(t[1].re, t[0].re, in[1].re, in[6].re);
 
  267     BF(t[1].im, t[0].im, in[1].im, in[6].im);
 
  268     BF(t[3].re, t[2].re, in[2].re, in[5].re);
 
  269     BF(t[3].im, t[2].im, in[2].im, in[5].im);
 
  270     BF(t[5].re, t[4].re, in[3].re, in[4].re);
 
  271     BF(t[5].im, t[4].im, in[3].im, in[4].im);
 
  273     out[0*
stride].re = 
dc.re + t[0].re + t[2].re + t[4].re;
 
  274     out[0*
stride].im = 
dc.im + t[0].im + t[2].im + t[4].im;
 
  305     z[0].re = 
tab[0].re*t[0].re - 
tab[2].re*t[4].re - 
tab[1].re*t[2].re;
 
  306     z[1].re = 
tab[0].re*t[4].re - 
tab[1].re*t[0].re - 
tab[2].re*t[2].re;
 
  307     z[2].re = 
tab[0].re*t[2].re - 
tab[2].re*t[0].re - 
tab[1].re*t[4].re;
 
  308     z[0].im = 
tab[0].re*t[0].im - 
tab[1].re*t[2].im - 
tab[2].re*t[4].im;
 
  309     z[1].im = 
tab[0].re*t[4].im - 
tab[1].re*t[0].im - 
tab[2].re*t[2].im;
 
  310     z[2].im = 
tab[0].re*t[2].im - 
tab[2].re*t[0].im - 
tab[1].re*t[4].im;
 
  315     t[0].re = 
tab[2].im*t[1].im + 
tab[1].im*t[5].im - 
tab[0].im*t[3].im;
 
  316     t[2].re = 
tab[0].im*t[5].im + 
tab[2].im*t[3].im - 
tab[1].im*t[1].im;
 
  317     t[4].re = 
tab[2].im*t[5].im + 
tab[1].im*t[3].im + 
tab[0].im*t[1].im;
 
  318     t[0].im = 
tab[0].im*t[1].re + 
tab[1].im*t[3].re + 
tab[2].im*t[5].re;
 
  319     t[2].im = 
tab[2].im*t[3].re + 
tab[0].im*t[5].re - 
tab[1].im*t[1].re;
 
  320     t[4].im = 
tab[2].im*t[1].re + 
tab[1].im*t[5].re - 
tab[0].im*t[3].re;
 
  323     BF(t[1].re, z[0].re, z[0].re, t[4].re);
 
  324     BF(t[3].re, z[1].re, z[1].re, t[2].re);
 
  325     BF(t[5].re, z[2].re, z[2].re, t[0].re);
 
  326     BF(t[1].im, z[0].im, z[0].im, t[0].im);
 
  327     BF(t[3].im, z[1].im, z[1].im, t[2].im);
 
  328     BF(t[5].im, z[2].im, z[2].im, t[4].im);
 
  354     BF(t[1].re, t[0].re, in[1].re, in[8].re);
 
  355     BF(t[1].im, t[0].im, in[1].im, in[8].im);
 
  356     BF(t[3].re, t[2].re, in[2].re, in[7].re);
 
  357     BF(t[3].im, t[2].im, in[2].im, in[7].im);
 
  358     BF(t[5].re, t[4].re, in[3].re, in[6].re);
 
  359     BF(t[5].im, t[4].im, in[3].im, in[6].im);
 
  360     BF(t[7].re, t[6].re, in[4].re, in[5].re);
 
  361     BF(t[7].im, t[6].im, in[4].im, in[5].im);
 
  363     w[0].re = t[0].re - t[6].re;
 
  364     w[0].im = t[0].im - t[6].im;
 
  365     w[1].re = t[2].re - t[6].re;
 
  366     w[1].im = t[2].im - t[6].im;
 
  367     w[2].re = t[1].re - t[7].re;
 
  368     w[2].im = t[1].im - t[7].im;
 
  369     w[3].re = t[3].re + t[7].re;
 
  370     w[3].im = t[3].im + t[7].im;
 
  372     z[0].re = 
dc.re + t[4].re;
 
  373     z[0].im = 
dc.im + t[4].im;
 
  375     z[1].re = t[0].re + t[2].re + t[6].re;
 
  376     z[1].im = t[0].im + t[2].im + t[6].im;
 
  382     mtmp[0] = t[1].re - t[3].re + t[7].re;
 
  383     mtmp[1] = t[1].im - t[3].im + t[7].im;
 
  393     x[3].re = z[0].re  + (
int32_t)mtmp[0];
 
  394     x[3].im = z[0].im  + (
int32_t)mtmp[1];
 
  395     z[0].re = in[0].re + (
int32_t)mtmp[2];
 
  396     z[0].im = in[0].im + (
int32_t)mtmp[3];
 
  420     y[3].re = 
tab[0].im*(t[1].re - t[3].re + t[7].re);
 
  421     y[3].im = 
tab[0].im*(t[1].im - t[3].im + t[7].im);
 
  423     x[3].re = z[0].re  + 
tab[0].re*z[1].re;
 
  424     x[3].im = z[0].im  + 
tab[0].re*z[1].im;
 
  425     z[0].re = 
dc.re + 
tab[0].re*t[4].re;
 
  426     z[0].im = 
dc.im + 
tab[0].re*t[4].im;
 
  428     x[1].re = 
tab[1].re*
w[0].re + 
tab[2].im*
w[1].re;
 
  429     x[1].im = 
tab[1].re*
w[0].im + 
tab[2].im*
w[1].im;
 
  430     x[2].re = 
tab[2].im*
w[0].re - 
tab[3].re*
w[1].re;
 
  431     x[2].im = 
tab[2].im*
w[0].im - 
tab[3].re*
w[1].im;
 
  432     y[1].re = 
tab[1].im*
w[2].re + 
tab[2].re*
w[3].re;
 
  433     y[1].im = 
tab[1].im*
w[2].im + 
tab[2].re*
w[3].im;
 
  434     y[2].re = 
tab[2].re*
w[2].re - 
tab[3].im*
w[3].re;
 
  435     y[2].im = 
tab[2].re*
w[2].im - 
tab[3].im*
w[3].im;
 
  437     y[0].re = 
tab[0].im*t[5].re;
 
  438     y[0].im = 
tab[0].im*t[5].im;
 
  441     x[4].re = x[1].re + x[2].re;
 
  442     x[4].im = x[1].im + x[2].im;
 
  444     y[4].re = y[1].re - y[2].re;
 
  445     y[4].im = y[1].im - y[2].im;
 
  446     x[1].re = z[0].re + x[1].re;
 
  447     x[1].im = z[0].im + x[1].im;
 
  448     y[1].re = y[0].re + y[1].re;
 
  449     y[1].im = y[0].im + y[1].im;
 
  450     x[2].re = z[0].re + x[2].re;
 
  451     x[2].im = z[0].im + x[2].im;
 
  452     y[2].re = y[2].re - y[0].re;
 
  453     y[2].im = y[2].im - y[0].im;
 
  454     x[4].re = z[0].re - x[4].re;
 
  455     x[4].im = z[0].im - x[4].im;
 
  456     y[4].re = y[0].re - y[4].re;
 
  457     y[4].im = y[0].im - y[4].im;
 
  474     for (
int i = 0; 
i < 5; 
i++)
 
  500 #define DECL_FACTOR_S(n)                                                       \ 
  501 static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst,                   \ 
  502                                   void *src, ptrdiff_t stride)                 \ 
  504     fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex));    \ 
  506 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = {                    \ 
  507     .name       = TX_NAME_STR("fft" #n "_ns"),                                 \ 
  508     .function   = TX_NAME(ff_tx_fft##n),                                       \ 
  509     .type       = TX_TYPE(FFT),                                                \ 
  510     .flags      = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |                         \ 
  511                   AV_TX_UNALIGNED | FF_TX_PRESHUFFLE,                          \ 
  516     .init       = TX_NAME(ff_tx_fft_factor_init),                              \ 
  517     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
  518     .prio       = FF_TX_PRIO_BASE,                                             \ 
  521 #define DECL_FACTOR_F(n)                                                       \ 
  523 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = {                   \ 
  524     .name       = TX_NAME_STR("fft" #n "_fwd"),                                \ 
  525     .function   = TX_NAME(ff_tx_fft##n),                                       \ 
  526     .type       = TX_TYPE(FFT),                                                \ 
  527     .flags      = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |                         \ 
  528                   AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY,                        \ 
  533     .init       = TX_NAME(ff_tx_fft_factor_init),                              \ 
  534     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
  535     .prio       = FF_TX_PRIO_BASE,                                             \ 
  544 #define BUTTERFLIES(a0, a1, a2, a3)            \ 
  550         BF(t3, t5, t5, t1);                    \ 
  551         BF(a2.re, a0.re, r0, t5);              \ 
  552         BF(a3.im, a1.im, i1, t3);              \ 
  553         BF(t4, t6, t2, t6);                    \ 
  554         BF(a3.re, a1.re, r1, t4);              \ 
  555         BF(a2.im, a0.im, i0, t6);              \ 
  558 #define TRANSFORM(a0, a1, a2, a3, wre, wim)    \ 
  560         CMUL(t1, t2, a2.re, a2.im, wre, -wim); \ 
  561         CMUL(t5, t6, a3.re, a3.im, wre,  wim); \ 
  562         BUTTERFLIES(a0, a1, a2, a3);           \ 
  567                                                  const TXSample *cos, 
int len)
 
  572     const TXSample *wim = cos + o1 - 7;
 
  573     TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
 
  575     for (
int i = 0; 
i < 
len; 
i += 4) {
 
  576         TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
 
  577         TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
 
  578         TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
 
  579         TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
 
  581         TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
 
  582         TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
 
  583         TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
 
  584         TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
 
  603 #define DECL_SR_CODELET_DEF(n)                              \ 
  604 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \ 
  605     .name       = TX_NAME_STR("fft" #n "_ns"),              \ 
  606     .function   = TX_NAME(ff_tx_fft##n##_ns),               \ 
  607     .type       = TX_TYPE(FFT),                             \ 
  608     .flags      = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE |      \ 
  609                   AV_TX_UNALIGNED | FF_TX_PRESHUFFLE,       \ 
  614     .init       = TX_NAME(ff_tx_fft_sr_codelet_init),       \ 
  615     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                      \ 
  616     .prio       = FF_TX_PRIO_BASE,                          \ 
  619 #define DECL_SR_CODELET(n, n2, n4)                                    \ 
  620 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *_dst,    \ 
  621                                         void *_src, ptrdiff_t stride) \ 
  623     TXComplex *src = _src;                                            \ 
  624     TXComplex *dst = _dst;                                            \ 
  625     const TXSample *cos = TX_TAB(ff_tx_tab_##n);                      \ 
  627     TX_NAME(ff_tx_fft##n2##_ns)(s, dst,        src,        stride);   \ 
  628     TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*2, src + n4*2, stride);   \ 
  629     TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*3, src + n4*3, stride);   \ 
  630     TX_NAME(ff_tx_fft_sr_combine)(dst, cos, n4 >> 1);                 \ 
  633 DECL_SR_CODELET_DEF(n) 
  652     TXSample t1, t2, t3, t4, t5, t6, t7, t8;
 
  669     TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
 
  670     const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
 
  688     const TXSample *cos = TX_TAB(ff_tx_tab_16);
 
  690     TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
 
  691     TXSample cos_16_1 = cos[1];
 
  692     TXSample cos_16_2 = cos[2];
 
  693     TXSample cos_16_3 = cos[3];
 
  777     int *
map = 
s->sub[0].map;
 
  782     for (
int i = 0; 
i < 
len; 
i++)
 
  785     s->fn[0](&
s->sub[0], dst2, dst1, 
stride);
 
  794     const int *
map = 
s->sub->map;
 
  795     const int *inplace_idx = 
s->map;
 
  796     int src_idx, dst_idx;
 
  798     src_idx = *inplace_idx++;
 
  801         dst_idx = 
map[src_idx];
 
  804             dst_idx = 
map[dst_idx];
 
  805         } 
while (dst_idx != src_idx); 
 
  807     } 
while ((src_idx = *inplace_idx++));
 
  813     .
name       = TX_NAME_STR(
"fft"),
 
  827     .
name       = TX_NAME_STR(
"fft_inplace_small"),
 
  841     .
name       = TX_NAME_STR(
"fft_inplace"),
 
  866     for (
int i = 0; 
i < 
len; 
i++) {
 
  867         for (
int j = 0; j < 
len; j++) {
 
  868             const double factor = phase*
i*j;
 
  884     const int n = 
s->len;
 
  885     double phase = 
s->inv ? 2.0*
M_PI/n : -2.0*
M_PI/n;
 
  889     for (
int i = 0; 
i < n; 
i++) {
 
  891         for (
int j = 0; j < n; j++) {
 
  892             const double factor = phase*
i*j;
 
  911     const int n = 
s->len;
 
  915     for (
int i = 0; 
i < n; 
i++) {
 
  917         for (
int j = 0; j < n; j++) {
 
  929     .
name       = TX_NAME_STR(
"fft_naive_small"),
 
  943     .
name       = TX_NAME_STR(
"fft_naive"),
 
  965     size_t extra_tmp_len = 0;
 
  972     for (
int i = 0; 
i < 
ret; 
i++) {
 
  973         int len1 = len_list[
i];
 
  974         int len2 = 
len / len1;
 
  977         if (len2 & (len2 - 1))
 
  992         } 
else if (
ret < 0) { 
 
 1013         } 
else if (
ret < 0) { 
 
 1020             } 
else if (
ret < 0) {
 
 1040                                           s->sub[0].len, 
s->sub[1].len)))
 
 1047     tmp = (
int *)
s->tmp;
 
 1048     for (
int k = 0; k < 
len; k += 
s->sub[0].len) {
 
 1049         memcpy(
tmp, &
s->map[k], 
s->sub[0].len*
sizeof(*
tmp));
 
 1050         for (
int i = 0; 
i < 
s->sub[0].len; 
i++)
 
 1051             s->map[k + 
i] = 
tmp[
s->sub[0].map[
i]];
 
 1056         extra_tmp_len = 
len;
 
 1058         extra_tmp_len = 
s->sub[0].len;
 
 1060     if (extra_tmp_len && !(
s->exp = 
av_malloc(extra_tmp_len*
sizeof(*
s->exp))))
 
 1067                                    void *_in, ptrdiff_t 
stride)
 
 1069     const int n = 
s->sub[0].len, m = 
s->sub[1].len, l = 
s->len;
 
 1070     const int *in_map = 
s->map, *out_map = in_map + l;
 
 1071     const int *sub_map = 
s->sub[1].map;
 
 1077     for (
int i = 0; 
i < m; 
i++) {
 
 1078         for (
int j = 0; j < n; j++)
 
 1079             s->exp[j] = in[in_map[
i*n + j]];
 
 1080         s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]], 
s->exp, m*
sizeof(
TXComplex));
 
 1083     for (
int i = 0; 
i < n; 
i++)
 
 1084         s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i], 
sizeof(
TXComplex));
 
 1086     for (
int i = 0; 
i < l; 
i++)
 
 1091                                       void *_in, ptrdiff_t 
stride)
 
 1093     const int n = 
s->sub[0].len, m = 
s->sub[1].len, l = 
s->len;
 
 1094     const int *in_map = 
s->map, *out_map = in_map + l;
 
 1095     const int *sub_map = 
s->sub[1].map;
 
 1101     for (
int i = 0; 
i < m; 
i++)
 
 1102         s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]], &in[
i*n], m*
sizeof(
TXComplex));
 
 1104     for (
int i = 0; 
i < n; 
i++)
 
 1105         s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i], 
sizeof(
TXComplex));
 
 1107     for (
int i = 0; 
i < l; 
i++)
 
 1112     .
name       = TX_NAME_STR(
"fft_pfa"),
 
 1126     .
name       = TX_NAME_STR(
"fft_pfa_ns"),
 
 1147     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1148     s->scale_f = 
s->scale_d;
 
 1157     double scale = 
s->scale_d;
 
 1159     const double phase = 
M_PI/(4.0*
len);
 
 1163     for (
int i = 0; 
i < 
len; 
i++) {
 
 1165         for (
int j = 0; j < 
len*2; j++) {
 
 1166             int a = (2*j + 1 + 
len) * (2*
i + 1);
 
 1167             sum += UNSCALE(
src[j]) * cos(
a * phase);
 
 1178     double scale = 
s->scale_d;
 
 1179     int len = 
s->len >> 1;
 
 1181     const double phase = 
M_PI/(4.0*len2);
 
 1185     for (
int i = 0; 
i < 
len; 
i++) {
 
 1188         double i_d = phase * (4*
len  - 2*
i - 1);
 
 1189         double i_u = phase * (3*len2 + 2*
i + 1);
 
 1190         for (
int j = 0; j < len2; j++) {
 
 1191             double a = (2 * j + 1);
 
 1192             double a_d = cos(
a * i_d);
 
 1193             double a_u = cos(
a * i_u);
 
 1204     .
name       = TX_NAME_STR(
"mdct_naive_fwd"),
 
 1218     .
name       = TX_NAME_STR(
"mdct_naive_inv"),
 
 1243     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1244     s->scale_f = 
s->scale_d;
 
 1264         memcpy(
s->map, 
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
 
 1266         for (
int i = 0; i < len >> 1; 
i++)
 
 1275         for (
int i = 0; 
i < (
s->len >> 1); 
i++)
 
 1286     const int len2 = 
s->len >> 1;
 
 1287     const int len4 = 
s->len >> 2;
 
 1288     const int len3 = len2 * 3;
 
 1289     const int *sub_map = 
s->map;
 
 1293     for (
int i = 0; 
i < len2; 
i++) { 
 
 1295         const int idx = sub_map[
i];
 
 1297             tmp.re = FOLD(-
src[ len2 + k],  
src[1*len2 - 1 - k]);
 
 1298             tmp.im = FOLD(-
src[ len3 + k], -
src[1*len3 - 1 - k]);
 
 1300             tmp.re = FOLD(-
src[ len2 + k], -
src[5*len2 - 1 - k]);
 
 1301             tmp.im = FOLD( 
src[-len2 + k], -
src[1*len3 - 1 - k]);
 
 1303         CMUL(z[idx].im, z[idx].re, 
tmp.re, 
tmp.im, 
exp[
i].re, 
exp[
i].im);
 
 1308     for (
int i = 0; 
i < len4; 
i++) {
 
 1309         const int i0 = len4 + 
i, i1 = len4 - 
i - 1;
 
 1324     const TXSample *
src = 
_src, *in1, *in2;
 
 1325     const int len2 = 
s->len >> 1;
 
 1326     const int len4 = 
s->len >> 2;
 
 1327     const int *sub_map = 
s->map;
 
 1333     for (
int i = 0; 
i < len2; 
i++) {
 
 1342     for (
int i = 0; 
i < len4; 
i++) {
 
 1343         const int i0 = len4 + 
i, i1 = len4 - 
i - 1;
 
 1347         CMUL(z[i1].re, z[i0].im, 
src1.re, 
src1.im, 
exp[i1].im, 
exp[i1].re);
 
 1348         CMUL(z[i0].re, z[i1].im, 
src0.re, 
src0.im, 
exp[i0].im, 
exp[i0].re);
 
 1353     .
name       = TX_NAME_STR(
"mdct_fwd"),
 
 1367     .
name       = TX_NAME_STR(
"mdct_inv"),
 
 1389     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1390     s->scale_f = 
s->scale_d;
 
 1403     int len  = 
s->len << 1;
 
 1404     int len2 = 
len >> 1;
 
 1405     int len4 = 
len >> 2;
 
 1412     for (
int i = 0; 
i < len4; 
i++) {
 
 1419     .
name       = TX_NAME_STR(
"mdct_inv_full"),
 
 1444     sub_len = 
len / cd->factors[0];
 
 1446     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1447     s->scale_f = 
s->scale_d;
 
 1454                                 sub_len, inv, 
scale)))
 
 1461     if (cd->factors[0] == 15)
 
 1468     for (
int i = 0; 
i < 
len; 
i++)
 
 1479 #define DECL_COMP_IMDCT(N)                                                     \ 
 1480 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst,    \ 
 1481                                                 void *_src, ptrdiff_t stride)  \ 
 1483     TXComplex fft##N##in[N];                                                   \ 
 1484     TXComplex *z = _dst, *exp = s->exp;                                        \ 
 1485     const TXSample *src = _src, *in1, *in2;                                    \ 
 1486     const int len4 = s->len >> 2;                                              \ 
 1487     const int len2 = s->len >> 1;                                              \ 
 1488     const int m = s->sub->len;                                                 \ 
 1489     const int *in_map = s->map, *out_map = in_map + N*m;                       \ 
 1490     const int *sub_map = s->sub->map;                                          \ 
 1492     stride /= sizeof(*src);                      \ 
 1494     in2 = src + ((N*m*2) - 1) * stride;                                        \ 
 1496     for (int i = 0; i < len2; i += N) {                                        \ 
 1497         for (int j = 0; j < N; j++) {                                          \ 
 1498             const int k = in_map[j];                                           \ 
 1499             TXComplex tmp = { in2[-k*stride], in1[k*stride] };                 \ 
 1500             CMUL3(fft##N##in[j], tmp, exp[j]);                                 \ 
 1502         fft##N(s->tmp + *(sub_map++), fft##N##in, m);                          \ 
 1507     for (int i = 0; i < N; i++)                                                \ 
 1508         s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex));   \ 
 1510     for (int i = 0; i < len4; i++) {                                           \ 
 1511         const int i0 = len4 + i, i1 = len4 - i - 1;                            \ 
 1512         const int s0 = out_map[i0], s1 = out_map[i1];                          \ 
 1513         TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re };                     \ 
 1514         TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re };                     \ 
 1516         CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);    \ 
 1517         CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);    \ 
 1521 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = {           \ 
 1522     .name       = TX_NAME_STR("mdct_pfa_" #N "xM_inv"),                        \ 
 1523     .function   = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv),                         \ 
 1524     .type       = TX_TYPE(MDCT),                                               \ 
 1525     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,   \ 
 1526     .factors    = { N, TX_FACTOR_ANY },                                        \ 
 1529     .max_len    = TX_LEN_UNLIMITED,                                            \ 
 1530     .init       = TX_NAME(ff_tx_mdct_pfa_init),                                \ 
 1531     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
 1532     .prio       = FF_TX_PRIO_BASE,                                             \ 
 1541 #define DECL_COMP_MDCT(N)                                                      \ 
 1542 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst,    \ 
 1543                                                 void *_src, ptrdiff_t stride)  \ 
 1545     TXComplex fft##N##in[N];                                                   \ 
 1546     TXSample *src = _src, *dst = _dst;                                         \ 
 1547     TXComplex *exp = s->exp, tmp;                                              \ 
 1548     const int m = s->sub->len;                                                 \ 
 1549     const int len4 = N*m;                                                      \ 
 1550     const int len3 = len4 * 3;                                                 \ 
 1551     const int len8 = s->len >> 2;                                              \ 
 1552     const int *in_map = s->map, *out_map = in_map + N*m;                       \ 
 1553     const int *sub_map = s->sub->map;                                          \ 
 1555     stride /= sizeof(*dst);                                                    \ 
 1557     for (int i = 0; i < m; i++) {              \ 
 1558         for (int j = 0; j < N; j++) {                                          \ 
 1559             const int k = in_map[i*N + j];                                     \ 
 1561                 tmp.re = FOLD(-src[ len4 + k],  src[1*len4 - 1 - k]);          \ 
 1562                 tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);          \ 
 1564                 tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);          \ 
 1565                 tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);          \ 
 1567             CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im,           \ 
 1568                  exp[k >> 1].re, exp[k >> 1].im);                              \ 
 1570         fft##N(s->tmp + sub_map[i], fft##N##in, m);                            \ 
 1573     for (int i = 0; i < N; i++)                                                \ 
 1574         s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex));   \ 
 1576     for (int i = 0; i < len8; i++) {                                           \ 
 1577         const int i0 = len8 + i, i1 = len8 - i - 1;                            \ 
 1578         const int s0 = out_map[i0], s1 = out_map[i1];                          \ 
 1579         TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im };                     \ 
 1580         TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im };                     \ 
 1582         CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,    \ 
 1583              exp[i0].im, exp[i0].re);                                          \ 
 1584         CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,    \ 
 1585              exp[i1].im, exp[i1].re);                                          \ 
 1589 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = {           \ 
 1590     .name       = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"),                        \ 
 1591     .function   = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd),                         \ 
 1592     .type       = TX_TYPE(MDCT),                                               \ 
 1593     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,   \ 
 1594     .factors    = { N, TX_FACTOR_ANY },                                        \ 
 1597     .max_len    = TX_LEN_UNLIMITED,                                            \ 
 1598     .init       = TX_NAME(ff_tx_mdct_pfa_init),                                \ 
 1599     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
 1600     .prio       = FF_TX_PRIO_BASE,                                             \ 
 1622     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1623     s->scale_f = 
s->scale_d;
 
 1630     if (!(
s->exp = 
av_mallocz((8 + 2*len4)*
sizeof(*
s->exp))))
 
 1633     tab = (TXSample *)
s->exp;
 
 1637     m = (inv ? 2*
s->scale_d : 
s->scale_d);
 
 1639     *
tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
 
 1640     *
tab++ = RESCALE(inv ? 0.5*m : 1.0*m);
 
 1641     *
tab++ = RESCALE( m);
 
 1642     *
tab++ = RESCALE(-m);
 
 1644     *
tab++ = RESCALE( (0.5 - 0.0) * m);
 
 1646         *
tab++ = 1 / 
s->scale_f;
 
 1648         *
tab++ = RESCALE( (0.0 - 0.5) * m);
 
 1649     *
tab++ = RESCALE( (0.5 - inv) * m);
 
 1650     *
tab++ = RESCALE(-(0.5 - inv) * m);
 
 1652     for (
int i = 0; 
i < len4; 
i++)
 
 1653         *
tab++ = RESCALE(cos(
i*
f));
 
 1655     tab = ((TXSample *)
s->exp) + len4 + 8;
 
 1657     for (
int i = 0; 
i < len4; 
i++)
 
 1658         *
tab++ = RESCALE(cos(((
len - 
i*4)/4.0)*
f)) * (inv ? 1 : -1);
 
 1663 #define DECL_RDFT(n, inv)                                                      \ 
 1664 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst,               \ 
 1665                                      void *_src, ptrdiff_t stride)             \ 
 1667     const int len2 = s->len >> 1;                                              \ 
 1668     const int len4 = s->len >> 2;                                              \ 
 1669     const TXSample *fact = (void *)s->exp;                                     \ 
 1670     const TXSample *tcos = fact + 8;                                           \ 
 1671     const TXSample *tsin = tcos + len4;                                        \ 
 1672     TXComplex *data = inv ? _src : _dst;                                       \ 
 1676         s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex));                   \ 
 1678         data[0].im = data[len2].re;                                            \ 
 1683     t[0].re = data[0].re;                                                      \ 
 1684     data[0].re = t[0].re + data[0].im;                                         \ 
 1685     data[0].im = t[0].re - data[0].im;                                         \ 
 1686     data[   0].re = MULT(fact[0], data[   0].re);                              \ 
 1687     data[   0].im = MULT(fact[1], data[   0].im);                              \ 
 1688     data[len4].re = MULT(fact[2], data[len4].re);                              \ 
 1689     data[len4].im = MULT(fact[3], data[len4].im);                              \ 
 1691     for (int i = 1; i < len4; i++) {                                           \ 
 1693         t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re));             \ 
 1694         t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im));             \ 
 1695         t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im));             \ 
 1696         t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re));             \ 
 1699         CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]);            \ 
 1701         data[       i].re = t[0].re + t[2].re;                                 \ 
 1702         data[       i].im = t[2].im - t[0].im;                                 \ 
 1703         data[len2 - i].re = t[0].re - t[2].re;                                 \ 
 1704         data[len2 - i].im = t[2].im + t[0].im;                                 \ 
 1708         s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex));                   \ 
 1711         data[len2].re = data[0].im;                                            \ 
 1712         data[   0].im = data[len2].im = 0;                                     \ 
 1716 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = {                   \ 
 1717     .name       = TX_NAME_STR("rdft_" #n),                                     \ 
 1718     .function   = TX_NAME(ff_tx_rdft_ ##n),                                    \ 
 1719     .type       = TX_TYPE(RDFT),                                               \ 
 1720     .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |       \ 
 1721                   (inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY),             \ 
 1722     .factors    = { 4, TX_FACTOR_ANY },                                        \ 
 1725     .max_len    = TX_LEN_UNLIMITED,                                            \ 
 1726     .init       = TX_NAME(ff_tx_rdft_init),                                    \ 
 1727     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
 1728     .prio       = FF_TX_PRIO_BASE,                                             \ 
 1734 #define DECL_RDFT_HALF(n, mode, mod2)                                          \ 
 1735 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst,               \ 
 1736                                         void *_src, ptrdiff_t stride)          \ 
 1738     const int len = s->len;                                                    \ 
 1739     const int len2 = len >> 1;                                                 \ 
 1740     const int len4 = len >> 2;                                                 \ 
 1741     const int aligned_len4 = FFALIGN(len, 4)/4;                                \ 
 1742     const TXSample *fact = (void *)s->exp;                                     \ 
 1743     const TXSample *tcos = fact + 8;                                           \ 
 1744     const TXSample *tsin = tcos + aligned_len4;                                \ 
 1745     TXComplex *data = _dst;                                                    \ 
 1746     TXSample *out = _dst;                    \ 
 1748     av_unused TXSample tmp_mid;                                                \ 
 1752     s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex));                       \ 
 1754     tmp_dc = data[0].re;                                                       \ 
 1755     data[   0].re = tmp_dc + data[0].im;                                       \ 
 1756     tmp_dc        = tmp_dc - data[0].im;                                       \ 
 1758     data[   0].re = MULT(fact[0], data[   0].re);                              \ 
 1759     tmp_dc        = MULT(fact[1],        tmp_dc);                              \ 
 1760     data[len4].re = MULT(fact[2], data[len4].re);                              \ 
 1763         data[len4].im = MULT(fact[3], data[len4].im);                          \ 
 1766         sl = data[len4 + 1];                                                   \ 
 1767         if (mode == AV_TX_REAL_TO_REAL)                                        \ 
 1768             tmp[0] = MULT(fact[4], (sf.re + sl.re));                           \ 
 1770             tmp[0] = MULT(fact[5], (sf.im - sl.im));                           \ 
 1771         tmp[1] = MULT(fact[6], (sf.im + sl.im));                               \ 
 1772         tmp[2] = MULT(fact[7], (sf.re - sl.re));                               \ 
 1774         if (mode == AV_TX_REAL_TO_REAL) {                                      \ 
 1775             tmp[3]  = tmp[1]*tcos[len4] - tmp[2]*tsin[len4];                   \ 
 1776             tmp_mid = (tmp[0] - tmp[3]);                                       \ 
 1778             tmp[3]  = tmp[1]*tsin[len4] + tmp[2]*tcos[len4];                   \ 
 1779             tmp_mid = (tmp[0] + tmp[3]);                                       \ 
 1784     for (int i = 1; i <= len4; i++) {                                          \ 
 1786         TXComplex sf = data[i];                                                \ 
 1787         TXComplex sl = data[len2 - i];                                         \ 
 1789         if (mode == AV_TX_REAL_TO_REAL)                                        \ 
 1790             tmp[0] = MULT(fact[4], (sf.re + sl.re));                           \ 
 1792             tmp[0] = MULT(fact[5], (sf.im - sl.im));                           \ 
 1794         tmp[1] = MULT(fact[6], (sf.im + sl.im));                               \ 
 1795         tmp[2] = MULT(fact[7], (sf.re - sl.re));                               \ 
 1797         if (mode == AV_TX_REAL_TO_REAL) {                                      \ 
 1798             tmp[3]           = tmp[1]*tcos[i] - tmp[2]*tsin[i];                \ 
 1799             out[i]           = (tmp[0] + tmp[3]);                              \ 
 1800             out[len - i]     = (tmp[0] - tmp[3]);                              \ 
 1802             tmp[3]           = tmp[1]*tsin[i] + tmp[2]*tcos[i];                \ 
 1803             out[i - 1]       = (tmp[3] - tmp[0]);                              \ 
 1804             out[len - i - 1] = (tmp[0] + tmp[3]);                              \ 
 1808     for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++)       \ 
 1809         out[len2 - i] = out[len - i];                                          \ 
 1811     if (mode == AV_TX_REAL_TO_REAL) {                                          \ 
 1812         out[len2] = tmp_dc;                                                    \ 
 1814             out[len4 + 1] = tmp_mid * fact[5];                                 \ 
 1815     } else if (mod2) {                                                         \ 
 1816         out[len4] = tmp_mid;                                                   \ 
 1820 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = {                   \ 
 1821     .name       = TX_NAME_STR("rdft_" #n),                                     \ 
 1822     .function   = TX_NAME(ff_tx_rdft_ ##n),                                    \ 
 1823     .type       = TX_TYPE(RDFT),                                               \ 
 1824     .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE | mode |                     \ 
 1825                   FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,                     \ 
 1826     .factors    = { 2 + 2*(!mod2), TX_FACTOR_ANY },                            \ 
 1828     .min_len    = 2 + 2*(!mod2),                                               \ 
 1829     .max_len    = TX_LEN_UNLIMITED,                                            \ 
 1830     .init       = TX_NAME(ff_tx_rdft_init),                                    \ 
 1831     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
 1832     .prio       = FF_TX_PRIO_BASE,                                             \ 
 1850     SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
 
 1865     tab = (TXSample *)
s->exp;
 
 1869     for (
int i = 0; 
i < 
len; 
i++)
 
 1870         tab[
i] = RESCALE(cos(
i*freq)*(!inv + 1));
 
 1873         for (
int i = 0; 
i < 
len/2; 
i++)
 
 1874             tab[
len + 
i] = RESCALE(0.5 / sin((2*
i + 1)*freq));
 
 1876         for (
int i = 0; 
i < 
len/2; 
i++)
 
 1877             tab[
len + 
i] = RESCALE(cos((
len - 2*
i - 1)*freq));
 
 1888     const int len = 
s->len;
 
 1889     const int len2 = 
len >> 1;
 
 1890     const TXSample *
exp = (
void *)
s->exp;
 
 1895     TXSample tmp1, tmp2;
 
 1898     for (
int i = 0; 
i < len2; 
i++) {
 
 1899         TXSample in1 = 
src[
i];
 
 1900         TXSample in2 = 
src[
len - 
i - 1];
 
 1910         tmp2 = (tmp2 + 0x40000000) >> 31;
 
 1912         tmp1 = (in1 + in2)*0.5;
 
 1913         tmp2 = (in1 - in2)*
s;
 
 1916         src[
i]           = tmp1 + tmp2;
 
 1917         src[
len - 
i - 1] = tmp1 - tmp2;
 
 1924     for (
int i = 
len - 2; 
i > 0; 
i -= 2) {
 
 1936     dst[0] = (tmp1 + 0x40000000) >> 31;
 
 1948     const int len = 
s->len;
 
 1949     const int len2 = 
len >> 1;
 
 1950     const TXSample *
exp = (
void *)
s->exp;
 
 1953     tmp2 = (2*tmp2 + 0x40000000) >> 31;
 
 1955     TXSample tmp1, tmp2 = 2*
src[
len - 1];
 
 1960     for (
int i = 
len - 2; 
i >= 2; 
i -= 2) {
 
 1961         TXSample val1 = 
src[
i - 0];
 
 1962         TXSample val2 = 
src[
i - 1] - 
src[
i + 1];
 
 1969     for (
int i = 0; 
i < len2; 
i++) {
 
 1970         TXSample in1 = 
dst[
i];
 
 1971         TXSample in2 = 
dst[
len - 
i - 1];
 
 1978         tmp2 = (tmp2 + 0x40000000) >> 31;
 
 1981         dst[
i]            = tmp1 + tmp2;
 
 1982         dst[
len - 
i - 1]  = tmp1 - tmp2;
 
 1987     .
name       = TX_NAME_STR(
"dctII"),
 
 2001     .
name       = TX_NAME_STR(
"dctIII"),
 
 2022     SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
 
 2051     const int len = 
s->len - 1;
 
 2052     TXSample *
tmp = (TXSample *)
s->tmp;
 
 2054     stride /= 
sizeof(TXSample);
 
 2056     for (
int i = 0; 
i < 
len; 
i++)
 
 2061     s->fn[0](&
s->sub[0], 
dst, 
tmp, 
sizeof(TXSample));
 
 2069     const int len = 
s->len + 1;
 
 2070     TXSample *
tmp = (
void *)
s->tmp;
 
 2072     stride /= 
sizeof(TXSample);
 
 2076     for (
int i = 1; 
i < 
len; 
i++) {
 
 2088     .
name       = TX_NAME_STR(
"dctI"),
 
 2102     .
name       = TX_NAME_STR(
"dstI"),
 
 2118     int len4 = 
s->len >> 1;
 
 2119     double scale = 
s->scale_d;
 
 2120     const double theta = (
scale < 0 ? len4 : 0) + 1.0/8.0;
 
 2121     size_t alloc = pre_tab ? 2*len4 : len4;
 
 2131     for (
int i = 0; 
i < len4; 
i++) {
 
 2138         for (
int i = 0; 
i < len4; 
i++)
 
 2139             s->exp[
i] = 
s->exp[len4 + pre_tab[
i]];
 
 2152     &
TX_NAME(ff_tx_fft128_ns_def),
 
 2153     &
TX_NAME(ff_tx_fft256_ns_def),
 
 2154     &
TX_NAME(ff_tx_fft512_ns_def),
 
 2155     &
TX_NAME(ff_tx_fft1024_ns_def),
 
 2156     &
TX_NAME(ff_tx_fft2048_ns_def),
 
 2157     &
TX_NAME(ff_tx_fft4096_ns_def),
 
 2158     &
TX_NAME(ff_tx_fft8192_ns_def),
 
 2159     &
TX_NAME(ff_tx_fft16384_ns_def),
 
 2160     &
TX_NAME(ff_tx_fft32768_ns_def),
 
 2161     &
TX_NAME(ff_tx_fft65536_ns_def),
 
 2162     &
TX_NAME(ff_tx_fft131072_ns_def),
 
 2163     &
TX_NAME(ff_tx_fft262144_ns_def),
 
 2164     &
TX_NAME(ff_tx_fft524288_ns_def),
 
 2165     &
TX_NAME(ff_tx_fft1048576_ns_def),
 
 2166     &
TX_NAME(ff_tx_fft2097152_ns_def),
 
 2183     &
TX_NAME(ff_tx_fft_inplace_def),
 
 2184     &
TX_NAME(ff_tx_fft_inplace_small_def),
 
 2186     &
TX_NAME(ff_tx_fft_pfa_ns_def),
 
 2187     &
TX_NAME(ff_tx_fft_naive_def),
 
 2188     &
TX_NAME(ff_tx_fft_naive_small_def),
 
 2191     &
TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
 
 2192     &
TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
 
 2193     &
TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
 
 2194     &
TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
 
 2195     &
TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
 
 2196     &
TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
 
 2197     &
TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
 
 2198     &
TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
 
 2199     &
TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
 
 2200     &
TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
 
 2201     &
TX_NAME(ff_tx_mdct_naive_fwd_def),
 
 2202     &
TX_NAME(ff_tx_mdct_naive_inv_def),
 
 2203     &
TX_NAME(ff_tx_mdct_inv_full_def),
 
 2206     &
TX_NAME(ff_tx_rdft_r2r_mod2_def),
 
 2208     &
TX_NAME(ff_tx_rdft_r2i_mod2_def),
 
  
int(* func)(AVBPrint *dst, const char *in, const char *arg)
 
static void TX_NAME() ff_tx_fft_sr_combine(TXComplex *z, const TXSample *cos, int len)
 
static av_cold int TX_NAME() ff_tx_dct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
@ AV_TX_REAL_TO_REAL
Perform a real to half-complex RDFT.
 
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
 
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
 
static void TX_NAME() ff_tx_fft(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define TX_MAX_DECOMPOSITIONS
 
static void TX_NAME() ff_tx_fft_pfa(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
 
static void TX_NAME() ff_tx_fft16_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
int ff_tx_gen_inplace_map(AVTXContext *s, int len)
 
static av_always_inline void fft15(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
#define FF_TX_CPU_FLAGS_ALL
 
int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts, int inv, int n, int m)
 
static void TX_NAME() ff_tx_dctI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static void TX_NAME() ff_tx_fft_naive(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
uint8_t ptrdiff_t const uint8_t * _src
 
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
 
static void TX_NAME() ff_tx_mdct_naive_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_rdft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define DECL_SR_CODELET_DEF(n)
 
static SR_POW2_TABLES void(*const sr_tabs_init_funcs[])(void)
 
static const struct twinvq_data tab
 
static const FFTXCodelet TX_NAME(ff_tx_fft_def)
 
static void sum_d(const int *input, int *output, int len)
 
static AVOnce sr_tabs_init_once[]
 
static double val(void *priv, double ch)
 
#define TABLE_DEF(name, size)
 
static int16_t mult(Float11 *f1, Float11 *f2)
 
static int ff_thread_once(char *control, void(*routine)(void))
 
#define FF_ARRAY_ELEMS(a)
 
static void c2r(float *buffer, int size)
 
static av_cold int TX_NAME() ff_tx_fft_factor_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
static void TX_NAME() ff_tx_mdct_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_mdct_naive_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define FF_TX_FORWARD_ONLY
 
static void TX_NAME() ff_tx_dstI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
 
static __device__ float fabs(float a)
 
@ AV_TX_REAL_TO_IMAGINARY
 
static av_cold int TX_NAME() ff_tx_mdct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
@ AV_TX_INPLACE
Allows for in-place transformations, where input == output.
 
int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
 
static void r2c(float *buffer, int size)
 
#define FF_TX_OUT_OF_PLACE
 
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
 
static void TX_NAME() ff_tx_dctIII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define DECL_COMP_MDCT(N)
 
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
 
static av_cold int TX_NAME() ff_tx_fft_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
void ff_tx_clear_ctx(AVTXContext *s)
 
static void TX_NAME() ff_tx_fft2_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_fft_sr_codelet_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
 
static av_cold void TX_TAB() ff_tx_init_tab_53(void)
 
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
 
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
 
static void TX_NAME() ff_tx_fft8_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_always_inline void fft9(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
 
#define TX_EMBED_INPUT_PFA_MAP(map, tot_len, d1, d2)
 
static void TX_NAME() ff_tx_fft_inplace(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define DECL_RDFT_HALF(n, mode, mod2)
 
static av_cold int TX_NAME() ff_tx_fft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
static void TX_NAME() ff_tx_mdct_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define i(width, name, range_min, range_max)
 
#define av_malloc_array(a, b)
 
static AVOnce nptwo_tabs_init_once[]
 
static av_cold int TX_NAME() ff_tx_fft_init_naive_small(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define DECL_SR_CODELET(n, n2, n4)
 
#define DECL_COMP_IMDCT(N)
 
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
 
static av_always_inline void fft3(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
static const FFTabInitData nptwo_tabs_init_data[]
 
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define FFSWAP(type, a, b)
 
static av_cold void TX_TAB() ff_tx_init_tab_7(void)
 
#define FF_TX_INVERSE_ONLY
 
static void TX_NAME() ff_tx_fft_naive_small(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold void TX_TAB() ff_tx_init_tab_9(void)
 
av_cold void TX_TAB() ff_tx_init_tabs(int len)
 
static void TX_NAME() ff_tx_mdct_naive_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static void TX_NAME() ff_tx_dctII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define BUTTERFLIES(a0, a1, a2, a3)
 
static void TX_NAME() ff_tx_fft_pfa_ns(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
 
static const int factor[16]
 
static av_cold int TX_NAME() ff_tx_dcstI_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
static av_cold int TX_NAME() ff_tx_fft_inplace_small_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
const VDPAUPixFmtMap * map
 
static void scale(int *out, const int *in, const int w, const int h, const int shift)
 
static const int16_t alpha[]
 
static av_always_inline void fft7(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
#define flags(name, subs,...)
 
int TX_TAB() ff_tx_mdct_gen_exp(AVTXContext *s, int *pre_tab)
 
int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts, int d1, int d2)
 
#define DECL_RDFT(n, inv)
 
static av_cold int TX_NAME() ff_tx_mdct_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
static void TX_NAME() ff_tx_fft4_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_mdct_inv_full_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type, int len, int inv)
 
static void TX_NAME() ff_tx_mdct_inv_full(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)