FFmpeg
mdct15.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2014 Mozilla Corporation
3  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Celt non-power of 2 iMDCT
25  */
26 
27 #include <float.h>
28 #include <math.h>
29 #include <stddef.h>
30 
31 #include "config.h"
32 
33 #include "libavutil/attributes.h"
34 #include "libavutil/common.h"
35 
36 #include "mdct15.h"
37 
38 #define FFT_FLOAT 1
39 #include "fft-internal.h"
40 
41 #define CMUL3(c, a, b) CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
42 
44 {
45  MDCT15Context *s = *ps;
46 
47  if (!s)
48  return;
49 
50  ff_fft_end(&s->ptwo_fft);
51 
52  av_freep(&s->pfa_prereindex);
53  av_freep(&s->pfa_postreindex);
54  av_freep(&s->twiddle_exptab);
55  av_freep(&s->tmp);
56 
57  av_freep(ps);
58 }
59 
61 {
62  int i, j;
63  const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */
64  const int l_ptwo = 1 << b_ptwo; /* Total length for the power of two FFTs */
65  const int inv_1 = l_ptwo << ((4 - b_ptwo) & 3); /* (2^b_ptwo)^-1 mod 15 */
66  const int inv_2 = 0xeeeeeeef & ((1U << b_ptwo) - 1); /* 15^-1 mod 2^b_ptwo */
67 
68  s->pfa_prereindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_prereindex));
69  if (!s->pfa_prereindex)
70  return 1;
71 
72  s->pfa_postreindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_postreindex));
73  if (!s->pfa_postreindex)
74  return 1;
75 
76  /* Pre/Post-reindex */
77  for (i = 0; i < l_ptwo; i++) {
78  for (j = 0; j < 15; j++) {
79  const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo;
80  const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo;
81  const int k_pre = 15*i + (j - q_pre*15)*(1 << b_ptwo);
82  const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo;
83  s->pfa_prereindex[i*15 + j] = k_pre << 1;
84  s->pfa_postreindex[k_post] = l_ptwo*j + i;
85  }
86  }
87 
88  return 0;
89 }
90 
91 /* Stride is hardcoded to 3 */
92 static inline void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
93 {
94  FFTComplex z0[4], t[6];
95 
96  t[0].re = in[3].re + in[12].re;
97  t[0].im = in[3].im + in[12].im;
98  t[1].im = in[3].re - in[12].re;
99  t[1].re = in[3].im - in[12].im;
100  t[2].re = in[6].re + in[ 9].re;
101  t[2].im = in[6].im + in[ 9].im;
102  t[3].im = in[6].re - in[ 9].re;
103  t[3].re = in[6].im - in[ 9].im;
104 
105  out[0].re = in[0].re + in[3].re + in[6].re + in[9].re + in[12].re;
106  out[0].im = in[0].im + in[3].im + in[6].im + in[9].im + in[12].im;
107 
108  t[4].re = exptab[0].re * t[2].re - exptab[1].re * t[0].re;
109  t[4].im = exptab[0].re * t[2].im - exptab[1].re * t[0].im;
110  t[0].re = exptab[0].re * t[0].re - exptab[1].re * t[2].re;
111  t[0].im = exptab[0].re * t[0].im - exptab[1].re * t[2].im;
112  t[5].re = exptab[0].im * t[3].re - exptab[1].im * t[1].re;
113  t[5].im = exptab[0].im * t[3].im - exptab[1].im * t[1].im;
114  t[1].re = exptab[0].im * t[1].re + exptab[1].im * t[3].re;
115  t[1].im = exptab[0].im * t[1].im + exptab[1].im * t[3].im;
116 
117  z0[0].re = t[0].re - t[1].re;
118  z0[0].im = t[0].im - t[1].im;
119  z0[1].re = t[4].re + t[5].re;
120  z0[1].im = t[4].im + t[5].im;
121 
122  z0[2].re = t[4].re - t[5].re;
123  z0[2].im = t[4].im - t[5].im;
124  z0[3].re = t[0].re + t[1].re;
125  z0[3].im = t[0].im + t[1].im;
126 
127  out[1].re = in[0].re + z0[3].re;
128  out[1].im = in[0].im + z0[0].im;
129  out[2].re = in[0].re + z0[2].re;
130  out[2].im = in[0].im + z0[1].im;
131  out[3].re = in[0].re + z0[1].re;
132  out[3].im = in[0].im + z0[2].im;
133  out[4].re = in[0].re + z0[0].re;
134  out[4].im = in[0].im + z0[3].im;
135 }
136 
137 static void fft15_c(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride)
138 {
139  int k;
140  FFTComplex tmp1[5], tmp2[5], tmp3[5];
141 
142  fft5(tmp1, in + 0, exptab + 19);
143  fft5(tmp2, in + 1, exptab + 19);
144  fft5(tmp3, in + 2, exptab + 19);
145 
146  for (k = 0; k < 5; k++) {
147  FFTComplex t[2];
148 
149  CMUL3(t[0], tmp2[k], exptab[k]);
150  CMUL3(t[1], tmp3[k], exptab[2 * k]);
151  out[stride*k].re = tmp1[k].re + t[0].re + t[1].re;
152  out[stride*k].im = tmp1[k].im + t[0].im + t[1].im;
153 
154  CMUL3(t[0], tmp2[k], exptab[k + 5]);
155  CMUL3(t[1], tmp3[k], exptab[2 * (k + 5)]);
156  out[stride*(k + 5)].re = tmp1[k].re + t[0].re + t[1].re;
157  out[stride*(k + 5)].im = tmp1[k].im + t[0].im + t[1].im;
158 
159  CMUL3(t[0], tmp2[k], exptab[k + 10]);
160  CMUL3(t[1], tmp3[k], exptab[2 * k + 5]);
161  out[stride*(k + 10)].re = tmp1[k].re + t[0].re + t[1].re;
162  out[stride*(k + 10)].im = tmp1[k].im + t[0].im + t[1].im;
163  }
164 }
165 
166 static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
167 {
168  int i, j;
169  const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1;
170  const int l_ptwo = 1 << s->ptwo_fft.nbits;
171  FFTComplex fft15in[15];
172 
173  /* Folding and pre-reindexing */
174  for (i = 0; i < l_ptwo; i++) {
175  for (j = 0; j < 15; j++) {
176  const int k = s->pfa_prereindex[i*15 + j];
177  FFTComplex tmp, exp = s->twiddle_exptab[k >> 1];
178  if (k < len4) {
179  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
180  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
181  } else {
182  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
183  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
184  }
185  CMUL(fft15in[j].im, fft15in[j].re, tmp.re, tmp.im, exp.re, exp.im);
186  }
187  s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
188  }
189 
190  /* Then a 15xN FFT (where N is a power of two) */
191  for (i = 0; i < 15; i++)
192  s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
193 
194  /* Reindex again, apply twiddles and output */
195  for (i = 0; i < len8; i++) {
196  const int i0 = len8 + i, i1 = len8 - i - 1;
197  const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1];
198 
199  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], s->tmp[s0].re, s->tmp[s0].im,
200  s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re);
201  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], s->tmp[s1].re, s->tmp[s1].im,
202  s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re);
203  }
204 }
205 
206 static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
207  ptrdiff_t stride)
208 {
209  FFTComplex fft15in[15];
210  FFTComplex *z = (FFTComplex *)dst;
211  int i, j, len8 = s->len4 >> 1, l_ptwo = 1 << s->ptwo_fft.nbits;
212  const float *in1 = src, *in2 = src + (s->len2 - 1) * stride;
213 
214  /* Reindex input, putting it into a buffer and doing an Nx15 FFT */
215  for (i = 0; i < l_ptwo; i++) {
216  for (j = 0; j < 15; j++) {
217  const int k = s->pfa_prereindex[i*15 + j];
218  FFTComplex tmp = { in2[-k*stride], in1[k*stride] };
219  CMUL3(fft15in[j], tmp, s->twiddle_exptab[k >> 1]);
220  }
221  s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
222  }
223 
224  /* Then a 15xN FFT (where N is a power of two) */
225  for (i = 0; i < 15; i++)
226  s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
227 
228  /* Reindex again, apply twiddles and output */
229  s->postreindex(z, s->tmp, s->twiddle_exptab, s->pfa_postreindex, len8);
230 }
231 
233  int *lut, ptrdiff_t len8)
234 {
235  int i;
236 
237  /* Reindex again, apply twiddles and output */
238  for (i = 0; i < len8; i++) {
239  const int i0 = len8 + i, i1 = len8 - i - 1;
240  const int s0 = lut[i0], s1 = lut[i1];
241 
242  CMUL(out[i1].re, out[i0].im, in[s1].im, in[s1].re, exp[i1].im, exp[i1].re);
243  CMUL(out[i0].re, out[i1].im, in[s0].im, in[s0].re, exp[i0].im, exp[i0].re);
244  }
245 }
246 
247 av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
248 {
249  MDCT15Context *s;
250  double alpha, theta;
251  int len2 = 15 * (1 << N);
252  int len = 2 * len2;
253  int i;
254 
255  /* Tested and verified to work on everything in between */
256  if ((N < 2) || (N > 13))
257  return AVERROR(EINVAL);
258 
259  s = av_mallocz(sizeof(*s));
260  if (!s)
261  return AVERROR(ENOMEM);
262 
263  s->fft_n = N - 1;
264  s->len4 = len2 / 2;
265  s->len2 = len2;
266  s->inverse = inverse;
267  s->fft15 = fft15_c;
268  s->mdct = mdct15;
269  s->imdct_half = imdct15_half;
270  s->postreindex = postrotate_c;
271 
272  if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0)
273  goto fail;
274 
276  goto fail;
277 
278  s->tmp = av_malloc_array(len, 2 * sizeof(*s->tmp));
279  if (!s->tmp)
280  goto fail;
281 
282  s->twiddle_exptab = av_malloc_array(s->len4, sizeof(*s->twiddle_exptab));
283  if (!s->twiddle_exptab)
284  goto fail;
285 
286  theta = 0.125f + (scale < 0 ? s->len4 : 0);
287  scale = sqrt(fabs(scale));
288  for (i = 0; i < s->len4; i++) {
289  alpha = 2 * M_PI * (i + theta) / len;
290  s->twiddle_exptab[i].re = cosf(alpha) * scale;
291  s->twiddle_exptab[i].im = sinf(alpha) * scale;
292  }
293 
294  /* 15-point FFT exptab */
295  for (i = 0; i < 19; i++) {
296  if (i < 15) {
297  double theta = (2.0f * M_PI * i) / 15.0f;
298  if (!s->inverse)
299  theta *= -1;
300  s->exptab[i].re = cosf(theta);
301  s->exptab[i].im = sinf(theta);
302  } else { /* Wrap around to simplify fft15 */
303  s->exptab[i] = s->exptab[i - 15];
304  }
305  }
306 
307  /* 5-point FFT exptab */
308  s->exptab[19].re = cosf(2.0f * M_PI / 5.0f);
309  s->exptab[19].im = sinf(2.0f * M_PI / 5.0f);
310  s->exptab[20].re = cosf(1.0f * M_PI / 5.0f);
311  s->exptab[20].im = sinf(1.0f * M_PI / 5.0f);
312 
313  /* Invert the phase for an inverse transform, do nothing for a forward transform */
314  if (s->inverse) {
315  s->exptab[19].im *= -1;
316  s->exptab[20].im *= -1;
317  }
318 
319  if (ARCH_X86)
321 
322  *ps = s;
323 
324  return 0;
325 
326 fail:
328  return AVERROR(ENOMEM);
329 }
exptab
static struct @131 * exptab
ff_fft_init
#define ff_fft_init
Definition: fft.h:135
stride
int stride
Definition: mace.c:144
imdct15_half
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.c:206
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
FILE * out
Definition: movenc.c:54
fft15_c
static void fft15_c(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride)
Definition: mdct15.c:137
im
float im
Definition: fft.c:78
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
CMUL3
#define CMUL3(c, a, b)
Definition: mdct15.c:41
mdct15.h
float.h
ff_mdct15_init
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
Definition: mdct15.c:247
fft5
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
cosf
#define cosf(x)
Definition: libm.h:78
U
#define U(x)
Definition: vp56_arith.h:37
fail
#define fail()
Definition: checkasm.h:127
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1388
av_cold
#define av_cold
Definition: attributes.h:90
init_pfa_reindex_tabs
static int init_pfa_reindex_tabs(MDCT15Context *s)
Definition: mdct15.c:60
s
#define s(width, name)
Definition: cbs_vp9.c:257
s1
#define s1
Definition: regdef.h:38
ff_fft_end
#define ff_fft_end
Definition: fft.h:136
MDCT15Context
Definition: mdct15.h:30
f
#define f(width, name)
Definition: cbs_vp9.c:255
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
fft-internal.h
src
#define src
Definition: vp8dsp.c:255
postrotate_c
static void postrotate_c(FFTComplex *out, FFTComplex *in, FFTComplex *exp, int *lut, ptrdiff_t len8)
Definition: mdct15.c:232
sinf
#define sinf(x)
Definition: libm.h:419
exp
int8_t exp
Definition: eval.c:72
mdct15
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.c:166
FFTComplex::im
FFTSample im
Definition: avfft.h:38
FFTComplex::re
FFTSample re
Definition: avfft.h:38
attributes.h
N
#define N
Definition: af_mcompand.c:53
M_PI
#define M_PI
Definition: mathematics.h:52
ff_mdct15_init_x86
void ff_mdct15_init_x86(MDCT15Context *s)
Definition: mdct15_init.c:82
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
common.h
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:263
ff_mdct15_uninit
av_cold void ff_mdct15_uninit(MDCT15Context **ps)
Definition: mdct15.c:43
len
int len
Definition: vorbis_enc_data.h:426
inverse
static int inverse(AudioFWTDNContext *s, double **in, int *in_length, double *out, int out_length, int ch, uint64_t sn)
Definition: af_afwtdn.c:762
s0
#define s0
Definition: regdef.h:37
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
CMUL
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:42
FFTComplex
Definition: avfft.h:37
re
float re
Definition: fft.c:78