FFmpeg: libavcodec/nellymoserenc.c Source File

00001 /*
00002  * Nellymoser encoder
00003  * This code is developed as part of Google Summer of Code 2008 Program.
00004  *
00005  * Copyright (c) 2008 Bartlomiej Wolowiec
00006  *
00007  * This file is part of FFmpeg.
00008  *
00009  * FFmpeg is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * FFmpeg is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with FFmpeg; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00022  */
00023 
00038 #include "nellymoser.h"
00039 #include "avcodec.h"
00040 #include "dsputil.h"
00041 #include "fft.h"
00042 #include "sinewin.h"
00043 
00044 #define BITSTREAM_WRITER_LE
00045 #include "put_bits.h"
00046 
00047 #define POW_TABLE_SIZE (1<<11)
00048 #define POW_TABLE_OFFSET 3
00049 #define OPT_SIZE ((1<<15) + 3000)
00050 
00051 typedef struct NellyMoserEncodeContext {
00052     AVCodecContext  *avctx;
00053     int             last_frame;
00054     int             bufsel;
00055     int             have_saved;
00056     DSPContext      dsp;
00057     FFTContext      mdct_ctx;
00058     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
00059     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
00060     DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];     
00061     float           (*opt )[NELLY_BANDS];
00062     uint8_t         (*path)[NELLY_BANDS];
00063 } NellyMoserEncodeContext;
00064 
00065 static float pow_table[POW_TABLE_SIZE];     
00066 
00067 static const uint8_t sf_lut[96] = {
00068      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00069      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
00070     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
00071     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
00072     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
00073     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
00074 };
00075 
00076 static const uint8_t sf_delta_lut[78] = {
00077      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00078      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
00079     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
00080     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
00081     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
00082 };
00083 
00084 static const uint8_t quant_lut[230] = {
00085      0,
00086 
00087      0,  1,  2,
00088 
00089      0,  1,  2,  3,  4,  5,  6,
00090 
00091      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
00092     12, 13, 13, 13, 14,
00093 
00094      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
00095      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
00096     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
00097     30,
00098 
00099      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
00100      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
00101     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
00102     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
00103     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
00104     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
00105     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
00106     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
00107     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
00108     61, 61, 61, 61, 62,
00109 };
00110 
00111 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
00112 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
00113 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
00114 
00115 static void apply_mdct(NellyMoserEncodeContext *s)
00116 {
00117     s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
00118     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
00119                                NELLY_BUF_LEN);
00120     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
00121 
00122     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
00123                        ff_sine_128, NELLY_BUF_LEN);
00124     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
00125                                NELLY_BUF_LEN);
00126     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
00127 }
00128 
00129 static av_cold int encode_init(AVCodecContext *avctx)
00130 {
00131     NellyMoserEncodeContext *s = avctx->priv_data;
00132     int i;
00133 
00134     if (avctx->channels != 1) {
00135         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
00136         return -1;
00137     }
00138 
00139     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
00140         avctx->sample_rate != 11025 &&
00141         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
00142         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
00143         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
00144         return -1;
00145     }
00146 
00147     avctx->frame_size = NELLY_SAMPLES;
00148     s->avctx = avctx;
00149     ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
00150     dsputil_init(&s->dsp, avctx);
00151 
00152     /* Generate overlap window */
00153     ff_sine_window_init(ff_sine_128, 128);
00154     for (i = 0; i < POW_TABLE_SIZE; i++)
00155         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
00156 
00157     if (s->avctx->trellis) {
00158         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
00159         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
00160     }
00161 
00162     return 0;
00163 }
00164 
00165 static av_cold int encode_end(AVCodecContext *avctx)
00166 {
00167     NellyMoserEncodeContext *s = avctx->priv_data;
00168 
00169     ff_mdct_end(&s->mdct_ctx);
00170 
00171     if (s->avctx->trellis) {
00172         av_free(s->opt);
00173         av_free(s->path);
00174     }
00175 
00176     return 0;
00177 }
00178 
00179 #define find_best(val, table, LUT, LUT_add, LUT_size) \
00180     best_idx = \
00181         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
00182     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
00183         best_idx++;
00184 
00185 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00186 {
00187     int band, best_idx, power_idx = 0;
00188     float power_candidate;
00189 
00190     //base exponent
00191     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
00192     idx_table[0] = best_idx;
00193     power_idx = ff_nelly_init_table[best_idx];
00194 
00195     for (band = 1; band < NELLY_BANDS; band++) {
00196         power_candidate = cand[band] - power_idx;
00197         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
00198         idx_table[band] = best_idx;
00199         power_idx += ff_nelly_delta_table[best_idx];
00200     }
00201 }
00202 
00203 static inline float distance(float x, float y, int band)
00204 {
00205     //return pow(fabs(x-y), 2.0);
00206     float tmp = x - y;
00207     return tmp * tmp;
00208 }
00209 
00210 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00211 {
00212     int i, j, band, best_idx;
00213     float power_candidate, best_val;
00214 
00215     float  (*opt )[NELLY_BANDS] = s->opt ;
00216     uint8_t(*path)[NELLY_BANDS] = s->path;
00217 
00218     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
00219         opt[0][i] = INFINITY;
00220     }
00221 
00222     for (i = 0; i < 64; i++) {
00223         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
00224         path[0][ff_nelly_init_table[i]] = i;
00225     }
00226 
00227     for (band = 1; band < NELLY_BANDS; band++) {
00228         int q, c = 0;
00229         float tmp;
00230         int idx_min, idx_max, idx;
00231         power_candidate = cand[band];
00232         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
00233             idx_min = FFMAX(0, cand[band] - q);
00234             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
00235             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
00236                 if ( isinf(opt[band - 1][i]) )
00237                     continue;
00238                 for (j = 0; j < 32; j++) {
00239                     idx = i + ff_nelly_delta_table[j];
00240                     if (idx > idx_max)
00241                         break;
00242                     if (idx >= idx_min) {
00243                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
00244                         if (opt[band][idx] > tmp) {
00245                             opt[band][idx] = tmp;
00246                             path[band][idx] = j;
00247                             c = 1;
00248                         }
00249                     }
00250                 }
00251             }
00252         }
00253         assert(c); //FIXME
00254     }
00255 
00256     best_val = INFINITY;
00257     best_idx = -1;
00258     band = NELLY_BANDS - 1;
00259     for (i = 0; i < OPT_SIZE; i++) {
00260         if (best_val > opt[band][i]) {
00261             best_val = opt[band][i];
00262             best_idx = i;
00263         }
00264     }
00265     for (band = NELLY_BANDS - 1; band >= 0; band--) {
00266         idx_table[band] = path[band][best_idx];
00267         if (band) {
00268             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
00269         }
00270     }
00271 }
00272 
00279 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
00280 {
00281     PutBitContext pb;
00282     int i, j, band, block, best_idx, power_idx = 0;
00283     float power_val, coeff, coeff_sum;
00284     float pows[NELLY_FILL_LEN];
00285     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
00286     float cand[NELLY_BANDS];
00287 
00288     apply_mdct(s);
00289 
00290     init_put_bits(&pb, output, output_size * 8);
00291 
00292     i = 0;
00293     for (band = 0; band < NELLY_BANDS; band++) {
00294         coeff_sum = 0;
00295         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00296             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
00297                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
00298         }
00299         cand[band] =
00300             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
00301     }
00302 
00303     if (s->avctx->trellis) {
00304         get_exponent_dynamic(s, cand, idx_table);
00305     } else {
00306         get_exponent_greedy(s, cand, idx_table);
00307     }
00308 
00309     i = 0;
00310     for (band = 0; band < NELLY_BANDS; band++) {
00311         if (band) {
00312             power_idx += ff_nelly_delta_table[idx_table[band]];
00313             put_bits(&pb, 5, idx_table[band]);
00314         } else {
00315             power_idx = ff_nelly_init_table[idx_table[0]];
00316             put_bits(&pb, 6, idx_table[0]);
00317         }
00318         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
00319         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00320             s->mdct_out[i] *= power_val;
00321             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
00322             pows[i] = power_idx;
00323         }
00324     }
00325 
00326     ff_nelly_get_sample_bits(pows, bits);
00327 
00328     for (block = 0; block < 2; block++) {
00329         for (i = 0; i < NELLY_FILL_LEN; i++) {
00330             if (bits[i] > 0) {
00331                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
00332                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
00333                 best_idx =
00334                     quant_lut[av_clip (
00335                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
00336                             quant_lut_offset[bits[i]],
00337                             quant_lut_offset[bits[i]+1] - 1
00338                             )];
00339                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
00340                     best_idx++;
00341 
00342                 put_bits(&pb, bits[i], best_idx);
00343             }
00344         }
00345         if (!block)
00346             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
00347     }
00348 
00349     flush_put_bits(&pb);
00350 }
00351 
00352 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
00353 {
00354     NellyMoserEncodeContext *s = avctx->priv_data;
00355     const int16_t *samples = data;
00356     int i;
00357 
00358     if (s->last_frame)
00359         return 0;
00360 
00361     if (data) {
00362         for (i = 0; i < avctx->frame_size; i++) {
00363             s->buf[s->bufsel][i] = samples[i];
00364         }
00365         for (; i < NELLY_SAMPLES; i++) {
00366             s->buf[s->bufsel][i] = 0;
00367         }
00368         s->bufsel = 1 - s->bufsel;
00369         if (!s->have_saved) {
00370             s->have_saved = 1;
00371             return 0;
00372         }
00373     } else {
00374         memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
00375         s->bufsel = 1 - s->bufsel;
00376         s->last_frame = 1;
00377     }
00378 
00379     if (s->have_saved) {
00380         encode_block(s, frame, buf_size);
00381         return NELLY_BLOCK_LEN;
00382     }
00383     return 0;
00384 }
00385 
00386 AVCodec ff_nellymoser_encoder = {
00387     .name = "nellymoser",
00388     .type = AVMEDIA_TYPE_AUDIO,
00389     .id = CODEC_ID_NELLYMOSER,
00390     .priv_data_size = sizeof(NellyMoserEncodeContext),
00391     .init = encode_init,
00392     .encode = encode_frame,
00393     .close = encode_end,
00394     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
00395     .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
00396     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
00397 };