00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00038 #include "libavutil/float_dsp.h"
00039 #include "libavutil/mathematics.h"
00040 #include "nellymoser.h"
00041 #include "avcodec.h"
00042 #include "audio_frame_queue.h"
00043 #include "dsputil.h"
00044 #include "fft.h"
00045 #include "internal.h"
00046 #include "sinewin.h"
00047
00048 #define BITSTREAM_WRITER_LE
00049 #include "put_bits.h"
00050
00051 #define POW_TABLE_SIZE (1<<11)
00052 #define POW_TABLE_OFFSET 3
00053 #define OPT_SIZE ((1<<15) + 3000)
00054
00055 typedef struct NellyMoserEncodeContext {
00056 AVCodecContext *avctx;
00057 int last_frame;
00058 DSPContext dsp;
00059 AVFloatDSPContext fdsp;
00060 FFTContext mdct_ctx;
00061 AudioFrameQueue afq;
00062 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
00063 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
00064 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];
00065 float (*opt )[NELLY_BANDS];
00066 uint8_t (*path)[NELLY_BANDS];
00067 } NellyMoserEncodeContext;
00068
00069 static float pow_table[POW_TABLE_SIZE];
00070
00071 static const uint8_t sf_lut[96] = {
00072 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
00073 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
00074 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
00075 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
00076 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
00077 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
00078 };
00079
00080 static const uint8_t sf_delta_lut[78] = {
00081 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
00082 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
00083 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
00084 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
00085 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
00086 };
00087
00088 static const uint8_t quant_lut[230] = {
00089 0,
00090
00091 0, 1, 2,
00092
00093 0, 1, 2, 3, 4, 5, 6,
00094
00095 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
00096 12, 13, 13, 13, 14,
00097
00098 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
00099 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
00100 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
00101 30,
00102
00103 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
00104 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
00105 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
00106 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
00107 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
00108 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
00109 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
00110 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
00111 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
00112 61, 61, 61, 61, 62,
00113 };
00114
00115 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
00116 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
00117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
00118
00119 static void apply_mdct(NellyMoserEncodeContext *s)
00120 {
00121 float *in0 = s->buf;
00122 float *in1 = s->buf + NELLY_BUF_LEN;
00123 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
00124
00125 s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
00126 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
00127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
00128
00129 s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
00130 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
00131 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
00132 }
00133
00134 static av_cold int encode_end(AVCodecContext *avctx)
00135 {
00136 NellyMoserEncodeContext *s = avctx->priv_data;
00137
00138 ff_mdct_end(&s->mdct_ctx);
00139
00140 if (s->avctx->trellis) {
00141 av_free(s->opt);
00142 av_free(s->path);
00143 }
00144 ff_af_queue_close(&s->afq);
00145 #if FF_API_OLD_ENCODE_AUDIO
00146 av_freep(&avctx->coded_frame);
00147 #endif
00148
00149 return 0;
00150 }
00151
00152 static av_cold int encode_init(AVCodecContext *avctx)
00153 {
00154 NellyMoserEncodeContext *s = avctx->priv_data;
00155 int i, ret;
00156
00157 if (avctx->channels != 1) {
00158 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
00159 return AVERROR(EINVAL);
00160 }
00161
00162 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
00163 avctx->sample_rate != 11025 &&
00164 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
00165 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
00166 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
00167 return AVERROR(EINVAL);
00168 }
00169
00170 avctx->frame_size = NELLY_SAMPLES;
00171 avctx->delay = NELLY_BUF_LEN;
00172 ff_af_queue_init(avctx, &s->afq);
00173 s->avctx = avctx;
00174 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
00175 goto error;
00176 ff_dsputil_init(&s->dsp, avctx);
00177 avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
00178
00179
00180 ff_init_ff_sine_windows(7);
00181 for (i = 0; i < POW_TABLE_SIZE; i++)
00182 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
00183
00184 if (s->avctx->trellis) {
00185 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
00186 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
00187 if (!s->opt || !s->path) {
00188 ret = AVERROR(ENOMEM);
00189 goto error;
00190 }
00191 }
00192
00193 #if FF_API_OLD_ENCODE_AUDIO
00194 avctx->coded_frame = avcodec_alloc_frame();
00195 if (!avctx->coded_frame) {
00196 ret = AVERROR(ENOMEM);
00197 goto error;
00198 }
00199 #endif
00200
00201 return 0;
00202 error:
00203 encode_end(avctx);
00204 return ret;
00205 }
00206
00207 #define find_best(val, table, LUT, LUT_add, LUT_size) \
00208 best_idx = \
00209 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
00210 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
00211 best_idx++;
00212
00213 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00214 {
00215 int band, best_idx, power_idx = 0;
00216 float power_candidate;
00217
00218
00219 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
00220 idx_table[0] = best_idx;
00221 power_idx = ff_nelly_init_table[best_idx];
00222
00223 for (band = 1; band < NELLY_BANDS; band++) {
00224 power_candidate = cand[band] - power_idx;
00225 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
00226 idx_table[band] = best_idx;
00227 power_idx += ff_nelly_delta_table[best_idx];
00228 }
00229 }
00230
00231 static inline float distance(float x, float y, int band)
00232 {
00233
00234 float tmp = x - y;
00235 return tmp * tmp;
00236 }
00237
00238 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00239 {
00240 int i, j, band, best_idx;
00241 float power_candidate, best_val;
00242
00243 float (*opt )[NELLY_BANDS] = s->opt ;
00244 uint8_t(*path)[NELLY_BANDS] = s->path;
00245
00246 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
00247 opt[0][i] = INFINITY;
00248 }
00249
00250 for (i = 0; i < 64; i++) {
00251 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
00252 path[0][ff_nelly_init_table[i]] = i;
00253 }
00254
00255 for (band = 1; band < NELLY_BANDS; band++) {
00256 int q, c = 0;
00257 float tmp;
00258 int idx_min, idx_max, idx;
00259 power_candidate = cand[band];
00260 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
00261 idx_min = FFMAX(0, cand[band] - q);
00262 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
00263 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
00264 if ( isinf(opt[band - 1][i]) )
00265 continue;
00266 for (j = 0; j < 32; j++) {
00267 idx = i + ff_nelly_delta_table[j];
00268 if (idx > idx_max)
00269 break;
00270 if (idx >= idx_min) {
00271 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
00272 if (opt[band][idx] > tmp) {
00273 opt[band][idx] = tmp;
00274 path[band][idx] = j;
00275 c = 1;
00276 }
00277 }
00278 }
00279 }
00280 }
00281 assert(c);
00282 }
00283
00284 best_val = INFINITY;
00285 best_idx = -1;
00286 band = NELLY_BANDS - 1;
00287 for (i = 0; i < OPT_SIZE; i++) {
00288 if (best_val > opt[band][i]) {
00289 best_val = opt[band][i];
00290 best_idx = i;
00291 }
00292 }
00293 for (band = NELLY_BANDS - 1; band >= 0; band--) {
00294 idx_table[band] = path[band][best_idx];
00295 if (band) {
00296 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
00297 }
00298 }
00299 }
00300
00307 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
00308 {
00309 PutBitContext pb;
00310 int i, j, band, block, best_idx, power_idx = 0;
00311 float power_val, coeff, coeff_sum;
00312 float pows[NELLY_FILL_LEN];
00313 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
00314 float cand[NELLY_BANDS];
00315
00316 apply_mdct(s);
00317
00318 init_put_bits(&pb, output, output_size * 8);
00319
00320 i = 0;
00321 for (band = 0; band < NELLY_BANDS; band++) {
00322 coeff_sum = 0;
00323 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00324 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
00325 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
00326 }
00327 cand[band] =
00328 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
00329 }
00330
00331 if (s->avctx->trellis) {
00332 get_exponent_dynamic(s, cand, idx_table);
00333 } else {
00334 get_exponent_greedy(s, cand, idx_table);
00335 }
00336
00337 i = 0;
00338 for (band = 0; band < NELLY_BANDS; band++) {
00339 if (band) {
00340 power_idx += ff_nelly_delta_table[idx_table[band]];
00341 put_bits(&pb, 5, idx_table[band]);
00342 } else {
00343 power_idx = ff_nelly_init_table[idx_table[0]];
00344 put_bits(&pb, 6, idx_table[0]);
00345 }
00346 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
00347 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00348 s->mdct_out[i] *= power_val;
00349 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
00350 pows[i] = power_idx;
00351 }
00352 }
00353
00354 ff_nelly_get_sample_bits(pows, bits);
00355
00356 for (block = 0; block < 2; block++) {
00357 for (i = 0; i < NELLY_FILL_LEN; i++) {
00358 if (bits[i] > 0) {
00359 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
00360 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
00361 best_idx =
00362 quant_lut[av_clip (
00363 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
00364 quant_lut_offset[bits[i]],
00365 quant_lut_offset[bits[i]+1] - 1
00366 )];
00367 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
00368 best_idx++;
00369
00370 put_bits(&pb, bits[i], best_idx);
00371 }
00372 }
00373 if (!block)
00374 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
00375 }
00376
00377 flush_put_bits(&pb);
00378 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
00379 }
00380
00381 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00382 const AVFrame *frame, int *got_packet_ptr)
00383 {
00384 NellyMoserEncodeContext *s = avctx->priv_data;
00385 int ret;
00386
00387 if (s->last_frame)
00388 return 0;
00389
00390 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
00391 if (frame) {
00392 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
00393 frame->nb_samples * sizeof(*s->buf));
00394 if (frame->nb_samples < NELLY_SAMPLES) {
00395 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
00396 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
00397 if (frame->nb_samples >= NELLY_BUF_LEN)
00398 s->last_frame = 1;
00399 }
00400 if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
00401 return ret;
00402 } else {
00403 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
00404 s->last_frame = 1;
00405 }
00406
00407 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)))
00408 return ret;
00409 encode_block(s, avpkt->data, avpkt->size);
00410
00411
00412 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
00413 &avpkt->duration);
00414
00415 *got_packet_ptr = 1;
00416 return 0;
00417 }
00418
00419 AVCodec ff_nellymoser_encoder = {
00420 .name = "nellymoser",
00421 .type = AVMEDIA_TYPE_AUDIO,
00422 .id = AV_CODEC_ID_NELLYMOSER,
00423 .priv_data_size = sizeof(NellyMoserEncodeContext),
00424 .init = encode_init,
00425 .encode2 = encode_frame,
00426 .close = encode_end,
00427 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
00428 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
00429 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
00430 AV_SAMPLE_FMT_NONE },
00431 };