FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nellymoserenc.c
Go to the documentation of this file.
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  * (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41 
42 #include "audio_frame_queue.h"
43 #include "avcodec.h"
44 #include "fft.h"
45 #include "internal.h"
46 #include "nellymoser.h"
47 #include "sinewin.h"
48 
49 #define BITSTREAM_WRITER_LE
50 #include "put_bits.h"
51 
52 #define POW_TABLE_SIZE (1<<11)
53 #define POW_TABLE_OFFSET 3
54 #define OPT_SIZE ((1<<15) + 3000)
55 
56 typedef struct NellyMoserEncodeContext {
64  DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65  float (*opt )[OPT_SIZE];
68 
69 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
70 
71 static const uint8_t sf_lut[96] = {
72  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73  5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74  15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75  27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76  41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77  54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78 };
79 
80 static const uint8_t sf_delta_lut[78] = {
81  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82  4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83  13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84  23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85  28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86 };
87 
88 static const uint8_t quant_lut[230] = {
89  0,
90 
91  0, 1, 2,
92 
93  0, 1, 2, 3, 4, 5, 6,
94 
95  0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
96  12, 13, 13, 13, 14,
97 
98  0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99  8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100  22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101  30,
102 
103  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104  4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106  15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107  21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109  46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110  53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111  58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112  61, 61, 61, 61, 62,
113 };
114 
115 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118 
120 {
121  float *in0 = s->buf;
122  float *in1 = s->buf + NELLY_BUF_LEN;
123  float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124 
125  s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126  s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127  s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128 
129  s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130  s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
132 }
133 
135 {
137 
138  ff_mdct_end(&s->mdct_ctx);
139 
140  if (s->avctx->trellis) {
141  av_freep(&s->opt);
142  av_freep(&s->path);
143  }
144  ff_af_queue_close(&s->afq);
145 
146  return 0;
147 }
148 
150 {
152  int i, ret;
153 
154  if (avctx->channels != 1) {
155  av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
156  return AVERROR(EINVAL);
157  }
158 
159  if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
160  avctx->sample_rate != 11025 &&
161  avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
163  av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
164  return AVERROR(EINVAL);
165  }
166 
167  avctx->frame_size = NELLY_SAMPLES;
169  ff_af_queue_init(avctx, &s->afq);
170  s->avctx = avctx;
171  if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
172  goto error;
174 
175  /* Generate overlap window */
177  for (i = 0; i < POW_TABLE_SIZE; i++)
178  pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
179 
180  if (s->avctx->trellis) {
181  s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
182  s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
183  if (!s->opt || !s->path) {
184  ret = AVERROR(ENOMEM);
185  goto error;
186  }
187  }
188 
189  return 0;
190 error:
191  encode_end(avctx);
192  return ret;
193 }
194 
195 #define find_best(val, table, LUT, LUT_add, LUT_size) \
196  best_idx = \
197  LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
198  if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
199  best_idx++;
200 
201 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
202 {
203  int band, best_idx, power_idx = 0;
204  float power_candidate;
205 
206  //base exponent
207  find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
208  idx_table[0] = best_idx;
209  power_idx = ff_nelly_init_table[best_idx];
210 
211  for (band = 1; band < NELLY_BANDS; band++) {
212  power_candidate = cand[band] - power_idx;
213  find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
214  idx_table[band] = best_idx;
215  power_idx += ff_nelly_delta_table[best_idx];
216  }
217 }
218 
219 static inline float distance(float x, float y, int band)
220 {
221  //return pow(fabs(x-y), 2.0);
222  float tmp = x - y;
223  return tmp * tmp;
224 }
225 
226 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
227 {
228  int i, j, band, best_idx;
229  float power_candidate, best_val;
230 
231  float (*opt )[OPT_SIZE] = s->opt ;
232  uint8_t(*path)[OPT_SIZE] = s->path;
233 
234  for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
235  opt[0][i] = INFINITY;
236  }
237 
238  for (i = 0; i < 64; i++) {
239  opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
240  path[0][ff_nelly_init_table[i]] = i;
241  }
242 
243  for (band = 1; band < NELLY_BANDS; band++) {
244  int q, c = 0;
245  float tmp;
246  int idx_min, idx_max, idx;
247  power_candidate = cand[band];
248  for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
249  idx_min = FFMAX(0, cand[band] - q);
250  idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
251  for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
252  if ( isinf(opt[band - 1][i]) )
253  continue;
254  for (j = 0; j < 32; j++) {
255  idx = i + ff_nelly_delta_table[j];
256  if (idx > idx_max)
257  break;
258  if (idx >= idx_min) {
259  tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
260  if (opt[band][idx] > tmp) {
261  opt[band][idx] = tmp;
262  path[band][idx] = j;
263  c = 1;
264  }
265  }
266  }
267  }
268  }
269  assert(c); //FIXME
270  }
271 
272  best_val = INFINITY;
273  best_idx = -1;
274  band = NELLY_BANDS - 1;
275  for (i = 0; i < OPT_SIZE; i++) {
276  if (best_val > opt[band][i]) {
277  best_val = opt[band][i];
278  best_idx = i;
279  }
280  }
281  for (band = NELLY_BANDS - 1; band >= 0; band--) {
282  idx_table[band] = path[band][best_idx];
283  if (band) {
284  best_idx -= ff_nelly_delta_table[path[band][best_idx]];
285  }
286  }
287 }
288 
289 /**
290  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
291  * @param s encoder context
292  * @param output output buffer
293  * @param output_size size of output buffer
294  */
295 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
296 {
297  PutBitContext pb;
298  int i, j, band, block, best_idx, power_idx = 0;
299  float power_val, coeff, coeff_sum;
300  float pows[NELLY_FILL_LEN];
301  int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
302  float cand[NELLY_BANDS];
303 
304  apply_mdct(s);
305 
306  init_put_bits(&pb, output, output_size * 8);
307 
308  i = 0;
309  for (band = 0; band < NELLY_BANDS; band++) {
310  coeff_sum = 0;
311  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
312  coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
313  + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
314  }
315  cand[band] =
316  log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
317  }
318 
319  if (s->avctx->trellis) {
320  get_exponent_dynamic(s, cand, idx_table);
321  } else {
322  get_exponent_greedy(s, cand, idx_table);
323  }
324 
325  i = 0;
326  for (band = 0; band < NELLY_BANDS; band++) {
327  if (band) {
328  power_idx += ff_nelly_delta_table[idx_table[band]];
329  put_bits(&pb, 5, idx_table[band]);
330  } else {
331  power_idx = ff_nelly_init_table[idx_table[0]];
332  put_bits(&pb, 6, idx_table[0]);
333  }
334  power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
335  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
336  s->mdct_out[i] *= power_val;
337  s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
338  pows[i] = power_idx;
339  }
340  }
341 
342  ff_nelly_get_sample_bits(pows, bits);
343 
344  for (block = 0; block < 2; block++) {
345  for (i = 0; i < NELLY_FILL_LEN; i++) {
346  if (bits[i] > 0) {
347  const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
348  coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
349  best_idx =
350  quant_lut[av_clip (
351  coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
352  quant_lut_offset[bits[i]],
353  quant_lut_offset[bits[i]+1] - 1
354  )];
355  if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
356  best_idx++;
357 
358  put_bits(&pb, bits[i], best_idx);
359  }
360  }
361  if (!block)
363  }
364 
365  flush_put_bits(&pb);
366  memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
367 }
368 
369 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
370  const AVFrame *frame, int *got_packet_ptr)
371 {
373  int ret;
374 
375  if (s->last_frame)
376  return 0;
377 
378  memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
379  if (frame) {
380  memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
381  frame->nb_samples * sizeof(*s->buf));
382  if (frame->nb_samples < NELLY_SAMPLES) {
383  memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
384  (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
385  if (frame->nb_samples >= NELLY_BUF_LEN)
386  s->last_frame = 1;
387  }
388  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
389  return ret;
390  } else {
391  memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
392  s->last_frame = 1;
393  }
394 
395  if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
396  return ret;
397  encode_block(s, avpkt->data, avpkt->size);
398 
399  /* Get the next frame pts/duration */
400  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
401  &avpkt->duration);
402 
403  *got_packet_ptr = 1;
404  return 0;
405 }
406 
408  .name = "nellymoser",
409  .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
410  .type = AVMEDIA_TYPE_AUDIO,
412  .priv_data_size = sizeof(NellyMoserEncodeContext),
413  .init = encode_init,
414  .encode2 = encode_frame,
415  .close = encode_end,
417  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
419 };