FFmpeg
mdct15.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2014 Mozilla Corporation
3  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Celt non-power of 2 iMDCT
25  */
26 
27 #include <float.h>
28 #include <math.h>
29 #include <stddef.h>
30 
31 #include "config.h"
32 
33 #include "libavutil/attributes.h"
34 #include "libavutil/common.h"
35 
36 #include "mdct15.h"
37 
38 #define FFT_FLOAT 1
39 #include "fft-internal.h"
40 
41 #define CMUL3(c, a, b) CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
42 
44 {
45  MDCT15Context *s = *ps;
46 
47  if (!s)
48  return;
49 
50  ff_fft_end(&s->ptwo_fft);
51 
55  av_freep(&s->tmp);
56 
57  av_freep(ps);
58 }
59 
61 {
62  int i, j;
63  const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */
64  const int l_ptwo = 1 << b_ptwo; /* Total length for the power of two FFTs */
65  const int inv_1 = l_ptwo << ((4 - b_ptwo) & 3); /* (2^b_ptwo)^-1 mod 15 */
66  const int inv_2 = 0xeeeeeeef & ((1U << b_ptwo) - 1); /* 15^-1 mod 2^b_ptwo */
67 
68  s->pfa_prereindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_prereindex));
69  if (!s->pfa_prereindex)
70  return 1;
71 
72  s->pfa_postreindex = av_malloc_array(15 * l_ptwo, sizeof(*s->pfa_postreindex));
73  if (!s->pfa_postreindex)
74  return 1;
75 
76  /* Pre/Post-reindex */
77  for (i = 0; i < l_ptwo; i++) {
78  for (j = 0; j < 15; j++) {
79  const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo;
80  const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo;
81  const int k_pre = 15*i + (j - q_pre*15)*(1 << b_ptwo);
82  const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo;
83  s->pfa_prereindex[i*15 + j] = k_pre << 1;
84  s->pfa_postreindex[k_post] = l_ptwo*j + i;
85  }
86  }
87 
88  return 0;
89 }
90 
91 /* Stride is hardcoded to 3 */
92 static inline void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
93 {
94  FFTComplex z0[4], t[6];
95 
96  t[0].re = in[3].re + in[12].re;
97  t[0].im = in[3].im + in[12].im;
98  t[1].im = in[3].re - in[12].re;
99  t[1].re = in[3].im - in[12].im;
100  t[2].re = in[6].re + in[ 9].re;
101  t[2].im = in[6].im + in[ 9].im;
102  t[3].im = in[6].re - in[ 9].re;
103  t[3].re = in[6].im - in[ 9].im;
104 
105  out[0].re = in[0].re + in[3].re + in[6].re + in[9].re + in[12].re;
106  out[0].im = in[0].im + in[3].im + in[6].im + in[9].im + in[12].im;
107 
108  t[4].re = exptab[0].re * t[2].re - exptab[1].re * t[0].re;
109  t[4].im = exptab[0].re * t[2].im - exptab[1].re * t[0].im;
110  t[0].re = exptab[0].re * t[0].re - exptab[1].re * t[2].re;
111  t[0].im = exptab[0].re * t[0].im - exptab[1].re * t[2].im;
112  t[5].re = exptab[0].im * t[3].re - exptab[1].im * t[1].re;
113  t[5].im = exptab[0].im * t[3].im - exptab[1].im * t[1].im;
114  t[1].re = exptab[0].im * t[1].re + exptab[1].im * t[3].re;
115  t[1].im = exptab[0].im * t[1].im + exptab[1].im * t[3].im;
116 
117  z0[0].re = t[0].re - t[1].re;
118  z0[0].im = t[0].im - t[1].im;
119  z0[1].re = t[4].re + t[5].re;
120  z0[1].im = t[4].im + t[5].im;
121 
122  z0[2].re = t[4].re - t[5].re;
123  z0[2].im = t[4].im - t[5].im;
124  z0[3].re = t[0].re + t[1].re;
125  z0[3].im = t[0].im + t[1].im;
126 
127  out[1].re = in[0].re + z0[3].re;
128  out[1].im = in[0].im + z0[0].im;
129  out[2].re = in[0].re + z0[2].re;
130  out[2].im = in[0].im + z0[1].im;
131  out[3].re = in[0].re + z0[1].re;
132  out[3].im = in[0].im + z0[2].im;
133  out[4].re = in[0].re + z0[0].re;
134  out[4].im = in[0].im + z0[3].im;
135 }
136 
138 {
139  int k;
140  FFTComplex tmp1[5], tmp2[5], tmp3[5];
141 
142  fft5(tmp1, in + 0, exptab + 19);
143  fft5(tmp2, in + 1, exptab + 19);
144  fft5(tmp3, in + 2, exptab + 19);
145 
146  for (k = 0; k < 5; k++) {
147  FFTComplex t[2];
148 
149  CMUL3(t[0], tmp2[k], exptab[k]);
150  CMUL3(t[1], tmp3[k], exptab[2 * k]);
151  out[stride*k].re = tmp1[k].re + t[0].re + t[1].re;
152  out[stride*k].im = tmp1[k].im + t[0].im + t[1].im;
153 
154  CMUL3(t[0], tmp2[k], exptab[k + 5]);
155  CMUL3(t[1], tmp3[k], exptab[2 * (k + 5)]);
156  out[stride*(k + 5)].re = tmp1[k].re + t[0].re + t[1].re;
157  out[stride*(k + 5)].im = tmp1[k].im + t[0].im + t[1].im;
158 
159  CMUL3(t[0], tmp2[k], exptab[k + 10]);
160  CMUL3(t[1], tmp3[k], exptab[2 * k + 5]);
161  out[stride*(k + 10)].re = tmp1[k].re + t[0].re + t[1].re;
162  out[stride*(k + 10)].im = tmp1[k].im + t[0].im + t[1].im;
163  }
164 }
165 
166 static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
167 {
168  int i, j;
169  const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1;
170  const int l_ptwo = 1 << s->ptwo_fft.nbits;
171  FFTComplex fft15in[15];
172 
173  /* Folding and pre-reindexing */
174  for (i = 0; i < l_ptwo; i++) {
175  for (j = 0; j < 15; j++) {
176  const int k = s->pfa_prereindex[i*15 + j];
177  FFTComplex tmp, exp = s->twiddle_exptab[k >> 1];
178  if (k < len4) {
179  tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
180  tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
181  } else {
182  tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
183  tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
184  }
185  CMUL(fft15in[j].im, fft15in[j].re, tmp.re, tmp.im, exp.re, exp.im);
186  }
187  s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
188  }
189 
190  /* Then a 15xN FFT (where N is a power of two) */
191  for (i = 0; i < 15; i++)
192  s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
193 
194  /* Reindex again, apply twiddles and output */
195  for (i = 0; i < len8; i++) {
196  const int i0 = len8 + i, i1 = len8 - i - 1;
197  const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1];
198 
199  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], s->tmp[s0].re, s->tmp[s0].im,
200  s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re);
201  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], s->tmp[s1].re, s->tmp[s1].im,
202  s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re);
203  }
204 }
205 
206 static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
207  ptrdiff_t stride)
208 {
209  FFTComplex fft15in[15];
210  FFTComplex *z = (FFTComplex *)dst;
211  int i, j, len8 = s->len4 >> 1, l_ptwo = 1 << s->ptwo_fft.nbits;
212  const float *in1 = src, *in2 = src + (s->len2 - 1) * stride;
213 
214  /* Reindex input, putting it into a buffer and doing an Nx15 FFT */
215  for (i = 0; i < l_ptwo; i++) {
216  for (j = 0; j < 15; j++) {
217  const int k = s->pfa_prereindex[i*15 + j];
218  FFTComplex tmp = { in2[-k*stride], in1[k*stride] };
219  CMUL3(fft15in[j], tmp, s->twiddle_exptab[k >> 1]);
220  }
221  s->fft15(s->tmp + s->ptwo_fft.revtab[i], fft15in, s->exptab, l_ptwo);
222  }
223 
224  /* Then a 15xN FFT (where N is a power of two) */
225  for (i = 0; i < 15; i++)
226  s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
227 
228  /* Reindex again, apply twiddles and output */
229  s->postreindex(z, s->tmp, s->twiddle_exptab, s->pfa_postreindex, len8);
230 }
231 
233  int *lut, ptrdiff_t len8)
234 {
235  int i;
236 
237  /* Reindex again, apply twiddles and output */
238  for (i = 0; i < len8; i++) {
239  const int i0 = len8 + i, i1 = len8 - i - 1;
240  const int s0 = lut[i0], s1 = lut[i1];
241 
242  CMUL(out[i1].re, out[i0].im, in[s1].im, in[s1].re, exp[i1].im, exp[i1].re);
243  CMUL(out[i0].re, out[i1].im, in[s0].im, in[s0].re, exp[i0].im, exp[i0].re);
244  }
245 }
246 
247 av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
248 {
249  MDCT15Context *s;
250  double alpha, theta;
251  int len2 = 15 * (1 << N);
252  int len = 2 * len2;
253  int i;
254 
255  /* Tested and verified to work on everything in between */
256  if ((N < 2) || (N > 13))
257  return AVERROR(EINVAL);
258 
259  s = av_mallocz(sizeof(*s));
260  if (!s)
261  return AVERROR(ENOMEM);
262 
263  s->fft_n = N - 1;
264  s->len4 = len2 / 2;
265  s->len2 = len2;
266  s->inverse = inverse;
267  s->fft15 = fft15_c;
268  s->mdct = mdct15;
271 
272  if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0)
273  goto fail;
274 
275  if (init_pfa_reindex_tabs(s))
276  goto fail;
277 
278  s->tmp = av_malloc_array(len, 2 * sizeof(*s->tmp));
279  if (!s->tmp)
280  goto fail;
281 
282  s->twiddle_exptab = av_malloc_array(s->len4, sizeof(*s->twiddle_exptab));
283  if (!s->twiddle_exptab)
284  goto fail;
285 
286  theta = 0.125f + (scale < 0 ? s->len4 : 0);
287  scale = sqrt(fabs(scale));
288  for (i = 0; i < s->len4; i++) {
289  alpha = 2 * M_PI * (i + theta) / len;
290  s->twiddle_exptab[i].re = cosf(alpha) * scale;
291  s->twiddle_exptab[i].im = sinf(alpha) * scale;
292  }
293 
294  /* 15-point FFT exptab */
295  for (i = 0; i < 19; i++) {
296  if (i < 15) {
297  double theta = (2.0f * M_PI * i) / 15.0f;
298  if (!s->inverse)
299  theta *= -1;
300  s->exptab[i].re = cosf(theta);
301  s->exptab[i].im = sinf(theta);
302  } else { /* Wrap around to simplify fft15 */
303  s->exptab[i] = s->exptab[i - 15];
304  }
305  }
306 
307  /* 5-point FFT exptab */
308  s->exptab[19].re = cosf(2.0f * M_PI / 5.0f);
309  s->exptab[19].im = sinf(2.0f * M_PI / 5.0f);
310  s->exptab[20].re = cosf(1.0f * M_PI / 5.0f);
311  s->exptab[20].im = sinf(1.0f * M_PI / 5.0f);
312 
313  /* Invert the phase for an inverse transform, do nothing for a forward transform */
314  if (s->inverse) {
315  s->exptab[19].im *= -1;
316  s->exptab[20].im *= -1;
317  }
318 
319  if (ARCH_X86)
321 
322  *ps = s;
323 
324  return 0;
325 
326 fail:
327  ff_mdct15_uninit(&s);
328  return AVERROR(ENOMEM);
329 }
float re
Definition: fft.c:82
FFTSample re
Definition: avfft.h:38
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.c:166
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
#define src
Definition: vp8dsp.c:254
#define N
Definition: af_mcompand.c:54
Macro definitions for various function/variable attributes.
#define av_cold
Definition: attributes.h:82
#define f(width, name)
Definition: cbs_vp9.c:255
static int init_pfa_reindex_tabs(MDCT15Context *s)
Definition: mdct15.c:60
void(* mdct)(struct MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.h:49
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:76
int fft_n
Definition: mdct15.h:29
static void postrotate_c(FFTComplex *out, FFTComplex *in, FFTComplex *exp, int *lut, ptrdiff_t len8)
Definition: mdct15.c:232
#define cosf(x)
Definition: libm.h:78
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
Definition: mdct15.c:247
#define U(x)
Definition: vp56_arith.h:37
FFTComplex * twiddle_exptab
Definition: mdct15.h:38
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define s0
Definition: regdef.h:37
void ff_mdct15_init_x86(MDCT15Context *s)
Definition: mdct15_init.c:81
#define fail()
Definition: checkasm.h:121
int8_t exp
Definition: eval.c:72
int len2
Definition: mdct15.h:30
int inverse
Definition: mdct15.h:32
FFTContext ptwo_fft
Definition: mdct15.h:36
int * pfa_prereindex
Definition: mdct15.h:33
int nbits
Definition: fft.h:89
#define ff_fft_init
Definition: fft.h:149
#define s(width, name)
Definition: cbs_vp9.c:257
FFTComplex * tmp
Definition: mdct15.h:37
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.c:206
int * pfa_postreindex
Definition: mdct15.h:34
void(* postreindex)(FFTComplex *out, FFTComplex *in, FFTComplex *exp, int *lut, ptrdiff_t len8)
Definition: mdct15.h:46
#define sinf(x)
Definition: libm.h:419
void(* imdct_half)(struct MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.h:52
static const int16_t alpha[]
Definition: ilbcdata.h:55
#define CMUL3(c, a, b)
Definition: mdct15.c:41
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
float im
Definition: fft.c:82
int len4
Definition: mdct15.h:31
static void fft15_c(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride)
Definition: mdct15.c:137
#define s1
Definition: regdef.h:38
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
FFTComplex exptab[64]
Definition: mdct15.h:40
FFTSample im
Definition: avfft.h:38
common internal and external API header
#define ff_fft_end
Definition: fft.h:150
void(* fft_calc)(struct FFTContext *s, FFTComplex *z)
Do a complex FFT with the parameters defined in ff_fft_init().
Definition: fft.h:106
av_cold void ff_mdct15_uninit(MDCT15Context **ps)
Definition: mdct15.c:43
void(* fft15)(FFTComplex *out, FFTComplex *in, FFTComplex *exptab, ptrdiff_t stride)
Definition: mdct15.h:43
int len
static struct @149 * exptab
FILE * out
Definition: movenc.c:54
#define av_freep(p)
uint16_t * revtab
Definition: fft.h:91
#define M_PI
Definition: mathematics.h:52
static uint32_t inverse(uint32_t v)
find multiplicative inverse modulo 2 ^ 32
Definition: asfcrypt.c:35
#define av_malloc_array(a, b)
#define stride
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
static uint8_t tmp[11]
Definition: aes_ctr.c:26