FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nellymoserenc.c
Go to the documentation of this file.
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  * (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/float_dsp.h"
39 #include "libavutil/mathematics.h"
40 #include "nellymoser.h"
41 #include "avcodec.h"
42 #include "audio_frame_queue.h"
43 #include "dsputil.h"
44 #include "fft.h"
45 #include "internal.h"
46 #include "sinewin.h"
47 
48 #define BITSTREAM_WRITER_LE
49 #include "put_bits.h"
50 
51 #define POW_TABLE_SIZE (1<<11)
52 #define POW_TABLE_OFFSET 3
53 #define OPT_SIZE ((1<<15) + 3000)
54 
55 typedef struct NellyMoserEncodeContext {
64  DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65  float (*opt )[OPT_SIZE];
68 
69 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
70 
71 static const uint8_t sf_lut[96] = {
72  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73  5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74  15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75  27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76  41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77  54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78 };
79 
80 static const uint8_t sf_delta_lut[78] = {
81  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82  4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83  13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84  23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85  28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86 };
87 
88 static const uint8_t quant_lut[230] = {
89  0,
90 
91  0, 1, 2,
92 
93  0, 1, 2, 3, 4, 5, 6,
94 
95  0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
96  12, 13, 13, 13, 14,
97 
98  0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99  8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100  22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101  30,
102 
103  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104  4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106  15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107  21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109  46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110  53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111  58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112  61, 61, 61, 61, 62,
113 };
114 
115 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118 
120 {
121  float *in0 = s->buf;
122  float *in1 = s->buf + NELLY_BUF_LEN;
123  float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124 
125  s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126  s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127  s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128 
129  s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130  s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
132 }
133 
135 {
137 
138  ff_mdct_end(&s->mdct_ctx);
139 
140  if (s->avctx->trellis) {
141  av_free(s->opt);
142  av_free(s->path);
143  }
144  ff_af_queue_close(&s->afq);
145 #if FF_API_OLD_ENCODE_AUDIO
146  av_freep(&avctx->coded_frame);
147 #endif
148 
149  return 0;
150 }
151 
153 {
155  int i, ret;
156 
157  if (avctx->channels != 1) {
158  av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
159  return AVERROR(EINVAL);
160  }
161 
162  if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
163  avctx->sample_rate != 11025 &&
164  avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
166  av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
167  return AVERROR(EINVAL);
168  }
169 
170  avctx->frame_size = NELLY_SAMPLES;
171  avctx->delay = NELLY_BUF_LEN;
172  ff_af_queue_init(avctx, &s->afq);
173  s->avctx = avctx;
174  if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
175  goto error;
176  ff_dsputil_init(&s->dsp, avctx);
178 
179  /* Generate overlap window */
181  for (i = 0; i < POW_TABLE_SIZE; i++)
182  pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
183 
184  if (s->avctx->trellis) {
185  s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
186  s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
187  if (!s->opt || !s->path) {
188  ret = AVERROR(ENOMEM);
189  goto error;
190  }
191  }
192 
193 #if FF_API_OLD_ENCODE_AUDIO
194  avctx->coded_frame = avcodec_alloc_frame();
195  if (!avctx->coded_frame) {
196  ret = AVERROR(ENOMEM);
197  goto error;
198  }
199 #endif
200 
201  return 0;
202 error:
203  encode_end(avctx);
204  return ret;
205 }
206 
207 #define find_best(val, table, LUT, LUT_add, LUT_size) \
208  best_idx = \
209  LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
210  if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
211  best_idx++;
212 
213 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
214 {
215  int band, best_idx, power_idx = 0;
216  float power_candidate;
217 
218  //base exponent
219  find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
220  idx_table[0] = best_idx;
221  power_idx = ff_nelly_init_table[best_idx];
222 
223  for (band = 1; band < NELLY_BANDS; band++) {
224  power_candidate = cand[band] - power_idx;
225  find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
226  idx_table[band] = best_idx;
227  power_idx += ff_nelly_delta_table[best_idx];
228  }
229 }
230 
231 static inline float distance(float x, float y, int band)
232 {
233  //return pow(fabs(x-y), 2.0);
234  float tmp = x - y;
235  return tmp * tmp;
236 }
237 
238 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
239 {
240  int i, j, band, best_idx;
241  float power_candidate, best_val;
242 
243  float (*opt )[OPT_SIZE] = s->opt ;
244  uint8_t(*path)[OPT_SIZE] = s->path;
245 
246  for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
247  opt[0][i] = INFINITY;
248  }
249 
250  for (i = 0; i < 64; i++) {
251  opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
252  path[0][ff_nelly_init_table[i]] = i;
253  }
254 
255  for (band = 1; band < NELLY_BANDS; band++) {
256  int q, c = 0;
257  float tmp;
258  int idx_min, idx_max, idx;
259  power_candidate = cand[band];
260  for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
261  idx_min = FFMAX(0, cand[band] - q);
262  idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
263  for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
264  if ( isinf(opt[band - 1][i]) )
265  continue;
266  for (j = 0; j < 32; j++) {
267  idx = i + ff_nelly_delta_table[j];
268  if (idx > idx_max)
269  break;
270  if (idx >= idx_min) {
271  tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
272  if (opt[band][idx] > tmp) {
273  opt[band][idx] = tmp;
274  path[band][idx] = j;
275  c = 1;
276  }
277  }
278  }
279  }
280  }
281  assert(c); //FIXME
282  }
283 
284  best_val = INFINITY;
285  best_idx = -1;
286  band = NELLY_BANDS - 1;
287  for (i = 0; i < OPT_SIZE; i++) {
288  if (best_val > opt[band][i]) {
289  best_val = opt[band][i];
290  best_idx = i;
291  }
292  }
293  for (band = NELLY_BANDS - 1; band >= 0; band--) {
294  idx_table[band] = path[band][best_idx];
295  if (band) {
296  best_idx -= ff_nelly_delta_table[path[band][best_idx]];
297  }
298  }
299 }
300 
301 /**
302  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
303  * @param s encoder context
304  * @param output output buffer
305  * @param output_size size of output buffer
306  */
307 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
308 {
309  PutBitContext pb;
310  int i, j, band, block, best_idx, power_idx = 0;
311  float power_val, coeff, coeff_sum;
312  float pows[NELLY_FILL_LEN];
313  int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
314  float cand[NELLY_BANDS];
315 
316  apply_mdct(s);
317 
318  init_put_bits(&pb, output, output_size * 8);
319 
320  i = 0;
321  for (band = 0; band < NELLY_BANDS; band++) {
322  coeff_sum = 0;
323  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
324  coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
325  + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
326  }
327  cand[band] =
328  log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
329  }
330 
331  if (s->avctx->trellis) {
332  get_exponent_dynamic(s, cand, idx_table);
333  } else {
334  get_exponent_greedy(s, cand, idx_table);
335  }
336 
337  i = 0;
338  for (band = 0; band < NELLY_BANDS; band++) {
339  if (band) {
340  power_idx += ff_nelly_delta_table[idx_table[band]];
341  put_bits(&pb, 5, idx_table[band]);
342  } else {
343  power_idx = ff_nelly_init_table[idx_table[0]];
344  put_bits(&pb, 6, idx_table[0]);
345  }
346  power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
347  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
348  s->mdct_out[i] *= power_val;
349  s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
350  pows[i] = power_idx;
351  }
352  }
353 
354  ff_nelly_get_sample_bits(pows, bits);
355 
356  for (block = 0; block < 2; block++) {
357  for (i = 0; i < NELLY_FILL_LEN; i++) {
358  if (bits[i] > 0) {
359  const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
360  coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
361  best_idx =
362  quant_lut[av_clip (
363  coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
364  quant_lut_offset[bits[i]],
365  quant_lut_offset[bits[i]+1] - 1
366  )];
367  if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
368  best_idx++;
369 
370  put_bits(&pb, bits[i], best_idx);
371  }
372  }
373  if (!block)
375  }
376 
377  flush_put_bits(&pb);
378  memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
379 }
380 
381 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
382  const AVFrame *frame, int *got_packet_ptr)
383 {
385  int ret;
386 
387  if (s->last_frame)
388  return 0;
389 
390  memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
391  if (frame) {
392  memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
393  frame->nb_samples * sizeof(*s->buf));
394  if (frame->nb_samples < NELLY_SAMPLES) {
395  memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
396  (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
397  if (frame->nb_samples >= NELLY_BUF_LEN)
398  s->last_frame = 1;
399  }
400  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
401  return ret;
402  } else {
403  memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
404  s->last_frame = 1;
405  }
406 
407  if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)))
408  return ret;
409  encode_block(s, avpkt->data, avpkt->size);
410 
411  /* Get the next frame pts/duration */
412  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
413  &avpkt->duration);
414 
415  *got_packet_ptr = 1;
416  return 0;
417 }
418 
420  .name = "nellymoser",
421  .type = AVMEDIA_TYPE_AUDIO,
423  .priv_data_size = sizeof(NellyMoserEncodeContext),
424  .init = encode_init,
425  .encode2 = encode_frame,
426  .close = encode_end,
428  .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
429  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
431 };