00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00038 #include "nellymoser.h"
00039 #include "avcodec.h"
00040 #include "dsputil.h"
00041 #include "fft.h"
00042 #include "sinewin.h"
00043
00044 #define BITSTREAM_WRITER_LE
00045 #include "put_bits.h"
00046
00047 #define POW_TABLE_SIZE (1<<11)
00048 #define POW_TABLE_OFFSET 3
00049 #define OPT_SIZE ((1<<15) + 3000)
00050
00051 typedef struct NellyMoserEncodeContext {
00052 AVCodecContext *avctx;
00053 int last_frame;
00054 int bufsel;
00055 int have_saved;
00056 DSPContext dsp;
00057 FFTContext mdct_ctx;
00058 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
00059 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
00060 DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];
00061 float (*opt )[NELLY_BANDS];
00062 uint8_t (*path)[NELLY_BANDS];
00063 } NellyMoserEncodeContext;
00064
00065 static float pow_table[POW_TABLE_SIZE];
00066
00067 static const uint8_t sf_lut[96] = {
00068 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
00069 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
00070 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
00071 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
00072 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
00073 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
00074 };
00075
00076 static const uint8_t sf_delta_lut[78] = {
00077 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
00078 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
00079 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
00080 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
00081 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
00082 };
00083
00084 static const uint8_t quant_lut[230] = {
00085 0,
00086
00087 0, 1, 2,
00088
00089 0, 1, 2, 3, 4, 5, 6,
00090
00091 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
00092 12, 13, 13, 13, 14,
00093
00094 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
00095 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
00096 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
00097 30,
00098
00099 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
00100 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
00101 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
00102 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
00103 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
00104 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
00105 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
00106 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
00107 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
00108 61, 61, 61, 61, 62,
00109 };
00110
00111 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
00112 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
00113 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
00114
00115 static void apply_mdct(NellyMoserEncodeContext *s)
00116 {
00117 s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
00118 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
00119 NELLY_BUF_LEN);
00120 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
00121
00122 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
00123 ff_sine_128, NELLY_BUF_LEN);
00124 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
00125 NELLY_BUF_LEN);
00126 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
00127 }
00128
00129 static av_cold int encode_init(AVCodecContext *avctx)
00130 {
00131 NellyMoserEncodeContext *s = avctx->priv_data;
00132 int i;
00133
00134 if (avctx->channels != 1) {
00135 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
00136 return -1;
00137 }
00138
00139 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
00140 avctx->sample_rate != 11025 &&
00141 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
00142 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
00143 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
00144 return -1;
00145 }
00146
00147 avctx->frame_size = NELLY_SAMPLES;
00148 s->avctx = avctx;
00149 ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
00150 dsputil_init(&s->dsp, avctx);
00151
00152
00153 ff_sine_window_init(ff_sine_128, 128);
00154 for (i = 0; i < POW_TABLE_SIZE; i++)
00155 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
00156
00157 if (s->avctx->trellis) {
00158 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
00159 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
00160 }
00161
00162 return 0;
00163 }
00164
00165 static av_cold int encode_end(AVCodecContext *avctx)
00166 {
00167 NellyMoserEncodeContext *s = avctx->priv_data;
00168
00169 ff_mdct_end(&s->mdct_ctx);
00170
00171 if (s->avctx->trellis) {
00172 av_free(s->opt);
00173 av_free(s->path);
00174 }
00175
00176 return 0;
00177 }
00178
00179 #define find_best(val, table, LUT, LUT_add, LUT_size) \
00180 best_idx = \
00181 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
00182 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
00183 best_idx++;
00184
00185 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00186 {
00187 int band, best_idx, power_idx = 0;
00188 float power_candidate;
00189
00190
00191 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
00192 idx_table[0] = best_idx;
00193 power_idx = ff_nelly_init_table[best_idx];
00194
00195 for (band = 1; band < NELLY_BANDS; band++) {
00196 power_candidate = cand[band] - power_idx;
00197 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
00198 idx_table[band] = best_idx;
00199 power_idx += ff_nelly_delta_table[best_idx];
00200 }
00201 }
00202
00203 static inline float distance(float x, float y, int band)
00204 {
00205
00206 float tmp = x - y;
00207 return tmp * tmp;
00208 }
00209
00210 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00211 {
00212 int i, j, band, best_idx;
00213 float power_candidate, best_val;
00214
00215 float (*opt )[NELLY_BANDS] = s->opt ;
00216 uint8_t(*path)[NELLY_BANDS] = s->path;
00217
00218 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
00219 opt[0][i] = INFINITY;
00220 }
00221
00222 for (i = 0; i < 64; i++) {
00223 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
00224 path[0][ff_nelly_init_table[i]] = i;
00225 }
00226
00227 for (band = 1; band < NELLY_BANDS; band++) {
00228 int q, c = 0;
00229 float tmp;
00230 int idx_min, idx_max, idx;
00231 power_candidate = cand[band];
00232 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
00233 idx_min = FFMAX(0, cand[band] - q);
00234 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
00235 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
00236 if ( isinf(opt[band - 1][i]) )
00237 continue;
00238 for (j = 0; j < 32; j++) {
00239 idx = i + ff_nelly_delta_table[j];
00240 if (idx > idx_max)
00241 break;
00242 if (idx >= idx_min) {
00243 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
00244 if (opt[band][idx] > tmp) {
00245 opt[band][idx] = tmp;
00246 path[band][idx] = j;
00247 c = 1;
00248 }
00249 }
00250 }
00251 }
00252 }
00253 assert(c);
00254 }
00255
00256 best_val = INFINITY;
00257 best_idx = -1;
00258 band = NELLY_BANDS - 1;
00259 for (i = 0; i < OPT_SIZE; i++) {
00260 if (best_val > opt[band][i]) {
00261 best_val = opt[band][i];
00262 best_idx = i;
00263 }
00264 }
00265 for (band = NELLY_BANDS - 1; band >= 0; band--) {
00266 idx_table[band] = path[band][best_idx];
00267 if (band) {
00268 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
00269 }
00270 }
00271 }
00272
00279 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
00280 {
00281 PutBitContext pb;
00282 int i, j, band, block, best_idx, power_idx = 0;
00283 float power_val, coeff, coeff_sum;
00284 float pows[NELLY_FILL_LEN];
00285 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
00286 float cand[NELLY_BANDS];
00287
00288 apply_mdct(s);
00289
00290 init_put_bits(&pb, output, output_size * 8);
00291
00292 i = 0;
00293 for (band = 0; band < NELLY_BANDS; band++) {
00294 coeff_sum = 0;
00295 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00296 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
00297 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
00298 }
00299 cand[band] =
00300 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
00301 }
00302
00303 if (s->avctx->trellis) {
00304 get_exponent_dynamic(s, cand, idx_table);
00305 } else {
00306 get_exponent_greedy(s, cand, idx_table);
00307 }
00308
00309 i = 0;
00310 for (band = 0; band < NELLY_BANDS; band++) {
00311 if (band) {
00312 power_idx += ff_nelly_delta_table[idx_table[band]];
00313 put_bits(&pb, 5, idx_table[band]);
00314 } else {
00315 power_idx = ff_nelly_init_table[idx_table[0]];
00316 put_bits(&pb, 6, idx_table[0]);
00317 }
00318 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
00319 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00320 s->mdct_out[i] *= power_val;
00321 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
00322 pows[i] = power_idx;
00323 }
00324 }
00325
00326 ff_nelly_get_sample_bits(pows, bits);
00327
00328 for (block = 0; block < 2; block++) {
00329 for (i = 0; i < NELLY_FILL_LEN; i++) {
00330 if (bits[i] > 0) {
00331 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
00332 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
00333 best_idx =
00334 quant_lut[av_clip (
00335 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
00336 quant_lut_offset[bits[i]],
00337 quant_lut_offset[bits[i]+1] - 1
00338 )];
00339 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
00340 best_idx++;
00341
00342 put_bits(&pb, bits[i], best_idx);
00343 }
00344 }
00345 if (!block)
00346 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
00347 }
00348
00349 flush_put_bits(&pb);
00350 }
00351
00352 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
00353 {
00354 NellyMoserEncodeContext *s = avctx->priv_data;
00355 const int16_t *samples = data;
00356 int i;
00357
00358 if (s->last_frame)
00359 return 0;
00360
00361 if (data) {
00362 for (i = 0; i < avctx->frame_size; i++) {
00363 s->buf[s->bufsel][i] = samples[i];
00364 }
00365 for (; i < NELLY_SAMPLES; i++) {
00366 s->buf[s->bufsel][i] = 0;
00367 }
00368 s->bufsel = 1 - s->bufsel;
00369 if (!s->have_saved) {
00370 s->have_saved = 1;
00371 return 0;
00372 }
00373 } else {
00374 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
00375 s->bufsel = 1 - s->bufsel;
00376 s->last_frame = 1;
00377 }
00378
00379 if (s->have_saved) {
00380 encode_block(s, frame, buf_size);
00381 return NELLY_BLOCK_LEN;
00382 }
00383 return 0;
00384 }
00385
00386 AVCodec ff_nellymoser_encoder = {
00387 .name = "nellymoser",
00388 .type = AVMEDIA_TYPE_AUDIO,
00389 .id = CODEC_ID_NELLYMOSER,
00390 .priv_data_size = sizeof(NellyMoserEncodeContext),
00391 .init = encode_init,
00392 .encode = encode_frame,
00393 .close = encode_end,
00394 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
00395 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
00396 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
00397 };