FFmpeg
mdct15.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2014 Mozilla Corporation
3  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Celt non-power of 2 iMDCT
25  */
26 
27 #include <float.h>
28 #include <math.h>
29 #include <stddef.h>
30 #include <stdint.h>
31 
32 #include "config.h"
33 
34 #include "libavutil/attributes.h"
35 #include "libavutil/error.h"
36 
37 #include "mdct15.h"
38 
39 #define FFT_FLOAT 1
40 #include "fft-internal.h"
41 
42 #define CMUL3(c, a, b) CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
43 
45 {
46  MDCT15Context *s = *ps;
47 
48  if (!s)
49  return;
50 
51  ff_fft_end(&s->ptwo_fft);
52 
53  av_freep(&s->pfa_prereindex);
54  av_freep(&s->pfa_postreindex);
55  av_freep(&s->twiddle_exptab);
56  av_freep(&s->tmp);
57 
58  av_freep(ps);
59 }
60 
62 {
63  int i, j;
64  const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */
65  const int l_ptwo = 1 << b_ptwo; /* Total length for the power of two FFTs */
66  const int inv_1 = l_ptwo << ((4 - b_ptwo) & 3); /* (2^b_ptwo)^-1 mod 15 */
67  const int inv_2 = 0xeeeeeeef & ((1U << b_ptwo) - 1); /* 15^-1 mod 2^b_ptwo */
68 
69  s->pfa_prereindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_prereindex));
70  if (!s->pfa_prereindex)
71  return 1;
72 
73  s->pfa_postreindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_postreindex));
74  if (!s->pfa_postreindex)
75  return 1;
76 
77  /* Pre/Post-reindex */
78  for (i = 0; i < l_ptwo; i++) {
79  for (j = 0; j < 15; j++) {
80  const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo;
81  const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo;
82  const int k_pre = 15*i + (j - q_pre*15)*(1 << b_ptwo);
83  const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo;
84  s->pfa_prereindex[i*15 + j] = k_pre << 1;
85  s->pfa_postreindex[k_post] = l_ptwo*j + i;
86  }
87  }
88 
89  return 0;
90 }
91 
92 /* Stride is hardcoded to 3 */
93 static inline void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
94 {
95  FFTComplex z0[4], t[6];
96 
97  t[0].re = in[3].re + in[12].re;
98  t[0].im = in[3].im + in[12].im;
99  t[1].im = in[3].re - in[12].re;
100  t[1].re = in[3].im - in[12].im;
101  t[2].re = in[6].re + in[ 9].re;
102  t[2].im = in[6].im + in[ 9].im;
103  t[3].im = in[6].re - in[ 9].re;
104  t[3].re = in[6].im - in[ 9].im;
105 
106  out[0].re = in[0].re + in[3].re + in[6].re + in[9].re + in[12].re;
107  out[0].im = in[0].im + in[3].im + in[6].im + in[9].im + in[12].im;
108 
109  t[4].re = exptab[0].re * t[2].re - exptab[1].re * t[0].re;
110  t[4].im = exptab[0].re * t[2].im - exptab[1].re * t[0].im;
111  t[0].re = exptab[0].re * t[0].re - exptab[1].re * t[2].re;
112  t[0].im = exptab[0].re * t[0].im - exptab[1].re * t[2].im;
113  t[5].re = exptab[0].im * t[3].re - exptab[1].im * t[1].re;
114  t[5].im = exptab[0].im * t[3].im - exptab[1].im * t[1].im;
115  t[1].re = exptab[0].im * t[1].re + exptab[1].im * t[3].re;
116  t[1].im = exptab[0].im * t[1].im + exptab[1].im * t[3].im;
117 
118  z0[0].re = t[0].re - t[1].re;
119  z0[0].im = t[0].im - t[1].im;
120  z0[1].re = t[4].re + t[5].re;
121  z0[1].im = t[4].im + t[5].im;
122 
123  z0[2].re = t[4].re - t[5].re;
124  z0[2].im = t[4].im - t[5].im;
125  z0[3].re = t[0].re + t[1].re;
126  z0[3].im = t[0].im + t[1].im;
127 
128  out[1].re = in[0].re + z0[3].re;
129  out[1].im = in[0].im + z0[0].im;
130  out[2].re = in[0].re + z0[2].re;
131  out[2].im = in[0].im + z0[1].im;
132  out[3].re = in[0].re + z0[1].re;
133  out[3].im = in[0].im + z0[2].im;
134  out[4].re = in[0].re + z0[0].re;
135  out[4].im = in[0].im + z0[3].im;
136 }
137 
138 static void fft15_c(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride)
139 {
140  int k;
141  FFTComplex tmp1[5], tmp2[5], tmp3[5];
142 
143  fft5(tmp1, in + 0, exptab + 19);
144  fft5(tmp2, in + 1, exptab + 19);
145  fft5(tmp3, in + 2, exptab + 19);
146 
147  for (k = 0; k < 5; k++) {
148  FFTComplex t[2];
149 
150  CMUL3(t[0], tmp2[k], exptab[k]);
151  CMUL3(t[1], tmp3[k], exptab[2 * k]);
152  out[stride*k].re = tmp1[k].re + t[0].re + t[1].re;
153  out[stride*k].im = tmp1[k].im + t[0].im + t[1].im;
154 
155  CMUL3(t[0], tmp2[k], exptab[k + 5]);
156  CMUL3(t[1], tmp3[k], exptab[2 * (k + 5)]);
157  out[stride*(k + 5)].re = tmp1[k].re + t[0].re + t[1].re;
158  out[stride*(k + 5)].im = tmp1[k].im + t[0].im + t[1].im;
159 
160  CMUL3(t[0], tmp2[k], exptab[k + 10]);
161  CMUL3(t[1], tmp3[k], exptab[2 * k + 5]);
162  out[stride*(k + 10)].re = tmp1[k].re + t[0].re + t[1].re;
163  out[stride*(k + 10)].im = tmp1[k].im + t[0].im + t[1].im;
164  }
165 }
166 
167 static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
168 {
169  int i, j;
170  const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1;
171  const int l_ptwo = 1 << s->ptwo_fft.nbits;
172  FFTComplex fft15in[15];
173 
174  /* Folding and pre-reindexing */
175  for (i = 0; i < l_ptwo; i++) {
176  for (j = 0; j < 15; j++) {
177  const int k = s->pfa_prereindex[i*15 + j];
178  FFTComplex tmp, exp = s->twiddle_exptab[k >> 1];
179  if (k < len4) {
180  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
181  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
182  } else {
183  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
184  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
185  }
186  CMUL(fft15in[j].im, fft15in[j].re, tmp.re, tmp.im, exp.re, exp.im);
187  }
188  s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
189  }
190 
191  /* Then a 15xN FFT (where N is a power of two) */
192  for (i = 0; i < 15; i++)
193  s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
194 
195  /* Reindex again, apply twiddles and output */
196  for (i = 0; i < len8; i++) {
197  const int i0 = len8 + i, i1 = len8 - i - 1;
198  const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1];
199 
200  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], s->tmp[s0].re, s->tmp[s0].im,
201  s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re);
202  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], s->tmp[s1].re, s->tmp[s1].im,
203  s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re);
204  }
205 }
206 
207 static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
208  ptrdiff_t stride)
209 {
210  FFTComplex fft15in[15];
211  FFTComplex *z = (FFTComplex *)dst;
212  int i, j, len8 = s->len4 >> 1, l_ptwo = 1 << s->ptwo_fft.nbits;
213  const float *in1 = src, *in2 = src + (s->len2 - 1) * stride;
214 
215  /* Reindex input, putting it into a buffer and doing an Nx15 FFT */
216  for (i = 0; i < l_ptwo; i++) {
217  for (j = 0; j < 15; j++) {
218  const int k = s->pfa_prereindex[i*15 + j];
219  FFTComplex tmp = { in2[-k*stride], in1[k*stride] };
220  CMUL3(fft15in[j], tmp, s->twiddle_exptab[k >> 1]);
221  }
222  s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
223  }
224 
225  /* Then a 15xN FFT (where N is a power of two) */
226  for (i = 0; i < 15; i++)
227  s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
228 
229  /* Reindex again, apply twiddles and output */
230  s->postreindex(z, s->tmp, s->twiddle_exptab, s->pfa_postreindex, len8);
231 }
232 
234  int *lut, ptrdiff_t len8)
235 {
236  int i;
237 
238  /* Reindex again, apply twiddles and output */
239  for (i = 0; i < len8; i++) {
240  const int i0 = len8 + i, i1 = len8 - i - 1;
241  const int s0 = lut[i0], s1 = lut[i1];
242 
243  CMUL(out[i1].re, out[i0].im, in[s1].im, in[s1].re, exp[i1].im, exp[i1].re);
244  CMUL(out[i0].re, out[i1].im, in[s0].im, in[s0].re, exp[i0].im, exp[i0].re);
245  }
246 }
247 
248 av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
249 {
250  MDCT15Context *s;
251  double alpha, theta;
252  int len2 = 15 * (1 << N);
253  int len = 2 * len2;
254  int i;
255 
256  /* Tested and verified to work on everything in between */
257  if ((N < 2) || (N > 13))
258  return AVERROR(EINVAL);
259 
260  s = av_mallocz(sizeof(*s));
261  if (!s)
262  return AVERROR(ENOMEM);
263 
264  s->fft_n = N - 1;
265  s->len4 = len2 / 2;
266  s->len2 = len2;
267  s->inverse = inverse;
268  s->fft15 = fft15_c;
269  s->mdct = mdct15;
270  s->imdct_half = imdct15_half;
271  s->postreindex = postrotate_c;
272 
273  if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0)
274  goto fail;
275 
277  goto fail;
278 
279  s->tmp = av_malloc_array(len, 2 * sizeof(*s->tmp));
280  if (!s->tmp)
281  goto fail;
282 
283  s->twiddle_exptab = av_malloc_array(s->len4, sizeof(*s->twiddle_exptab));
284  if (!s->twiddle_exptab)
285  goto fail;
286 
287  theta = 0.125f + (scale < 0 ? s->len4 : 0);
288  scale = sqrt(fabs(scale));
289  for (i = 0; i < s->len4; i++) {
290  alpha = 2 * M_PI * (i + theta) / len;
291  s->twiddle_exptab[i].re = cosf(alpha) * scale;
292  s->twiddle_exptab[i].im = sinf(alpha) * scale;
293  }
294 
295  /* 15-point FFT exptab */
296  for (i = 0; i < 19; i++) {
297  if (i < 15) {
298  double theta = (2.0f * M_PI * i) / 15.0f;
299  if (!s->inverse)
300  theta *= -1;
301  s->exptab[i].re = cosf(theta);
302  s->exptab[i].im = sinf(theta);
303  } else { /* Wrap around to simplify fft15 */
304  s->exptab[i] = s->exptab[i - 15];
305  }
306  }
307 
308  /* 5-point FFT exptab */
309  s->exptab[19].re = cosf(2.0f * M_PI / 5.0f);
310  s->exptab[19].im = sinf(2.0f * M_PI / 5.0f);
311  s->exptab[20].re = cosf(1.0f * M_PI / 5.0f);
312  s->exptab[20].im = sinf(1.0f * M_PI / 5.0f);
313 
314  /* Invert the phase for an inverse transform, do nothing for a forward transform */
315  if (s->inverse) {
316  s->exptab[19].im *= -1;
317  s->exptab[20].im *= -1;
318  }
319 
320 #if ARCH_X86
322 #endif
323 
324  *ps = s;
325 
326  return 0;
327 
328 fail:
330  return AVERROR(ENOMEM);
331 }
ff_fft_init
#define ff_fft_init
Definition: fft.h:135
imdct15_half
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.c:207
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
FILE * out
Definition: movenc.c:54
inverse
inverse
Definition: af_crystalizer.c:122
fft15_c
static void fft15_c(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride)
Definition: mdct15.c:138
im
float im
Definition: fft.c:79
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
CMUL3
#define CMUL3(c, a, b)
Definition: mdct15.c:42
mdct15.h
float.h
ff_mdct15_init
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
Definition: mdct15.c:248
fft5
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:93
cosf
#define cosf(x)
Definition: libm.h:78
fail
#define fail()
Definition: checkasm.h:131
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1389
av_cold
#define av_cold
Definition: attributes.h:90
init_pfa_reindex_tabs
static int init_pfa_reindex_tabs(MDCT15Context *s)
Definition: mdct15.c:61
s
#define s(width, name)
Definition: cbs_vp9.c:256
s1
#define s1
Definition: regdef.h:38
ff_fft_end
#define ff_fft_end
Definition: fft.h:136
MDCT15Context
Definition: mdct15.h:30
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
fft-internal.h
postrotate_c
static void postrotate_c(FFTComplex *out, FFTComplex *in, FFTComplex *exp, int *lut, ptrdiff_t len8)
Definition: mdct15.c:233
sinf
#define sinf(x)
Definition: libm.h:419
exp
int8_t exp
Definition: eval.c:72
error.h
f
f
Definition: af_crystalizer.c:122
mdct15
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.c:167
exptab
static struct @140 * exptab
FFTComplex::im
FFTSample im
Definition: avfft.h:38
FFTComplex::re
FFTSample re
Definition: avfft.h:38
attributes.h
N
#define N
Definition: af_mcompand.c:53
M_PI
#define M_PI
Definition: mathematics.h:52
ff_mdct15_init_x86
void ff_mdct15_init_x86(MDCT15Context *s)
Definition: mdct15_init.c:84
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:264
ff_mdct15_uninit
av_cold void ff_mdct15_uninit(MDCT15Context **ps)
Definition: mdct15.c:44
len
int len
Definition: vorbis_enc_data.h:426
stride
#define stride
Definition: h264pred_template.c:537
U
#define U(x)
Definition: vpx_arith.h:37
s0
#define s0
Definition: regdef.h:37
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
CMUL
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:42
FFTComplex
Definition: avfft.h:37
re
float re
Definition: fft.c:79