[FFmpeg-devel] Nellymoser encoder

Thu Aug 28 00:11:20 CEST 2008

On Wed, Aug 27, 2008 at 08:43:29PM +0200, Bartlomiej Wolowiec wrote:
> Hi,
> here is Nellymoser encoder code. The code is from soc svn. It doesn't contain 
> lowpass filter.
> -- 
> Bartlomiej Wolowiec

[...]
> +typedef struct NellyMoserEncodeContext {
> +    AVCodecContext  *avctx;
> +    int             last_frame;
> +    int             bufsize;              ///< number of sample in buf

> +    int             bits[NELLY_BUF_LEN];  ///< number of bits used to encode coeff
> +    float           pows[NELLY_FILL_LEN]; ///< exponent used to code coeff

only used in one function so no need to be in the context

> +    DSPContext      dsp;
> +    MDCTContext     mdct_ctx;
> +    DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
> +    DECLARE_ALIGNED_16(float, buf[2 * NELLY_SAMPLES]);     ///< sample buffer
> +} NellyMoserEncodeContext;
> +

> +static DECLARE_ALIGNED_16(float, sine_window[NELLY_SAMPLES]);

duplicate of ff_sine_windows and sine_window form nellymoserdec

> +static float pow_table[MAX_POW_CACHED];     ///< -pow(2, -i / 2048.0 - 3.0);
> +
> +void apply_mdct(NellyMoserEncodeContext *s, float *in, float *coefs)
> +{
> +    DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
> +
> +    memcpy(&in_buff[0], &in[0], NELLY_SAMPLES * sizeof(float));
> +    s->dsp.vector_fmul(in_buff, sine_window, NELLY_SAMPLES);
> +    memset(coefs, 0, NELLY_BUF_LEN * sizeof(float));
> +    ff_mdct_calc(&s->mdct_ctx, coefs, in_buff);
> +}

what is the memset good for?

> +
> +static av_cold int encode_init(AVCodecContext *avctx)
> +{
> +    NellyMoserEncodeContext *s = avctx->priv_data;
> +    int i;
> +
> +    if (avctx->channels != 1) {
> +        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
> +        return -1;
> +    }
> +
> +    if(avctx->sample_rate != 8000 && avctx->sample_rate != 11025 &&
> +            avctx->sample_rate != 22050 && avctx->sample_rate != 44100){
> +        av_log(avctx, AV_LOG_ERROR,
> +                "Nellymoser works only with 8000, 11025, 22050 and 44100 sample rate\n");
> +        return -1;
> +    }
> +
> +    avctx->frame_size = NELLY_SAMPLES;
> +    s->avctx = avctx;

> +    s->bufsize = 0;
> +    s->last_frame = 0;

they should already be memset(0)

[...]
> +/**
> + * Searching index in table with size table_size, where
> + * |val-table[best_idx]| is minimal.
> + * It assumes that table elements are in increasing order and uses binary search.
> + */
> +#define find_best_value(val, table, table_size, best_idx) \
> +{ \
> +    int first=0, last=table_size-1, mid; \
> +    while(first<=last){ \
> +        mid=(first+last)/2; \
> +        if(val > table[mid]){ \
> +            first = mid + 1; \
> +        }else{ \
> +            last = mid - 1; \
> +        } \
> +    } \
> +    if(!first || (first!=table_size && table[first]-val < val-table[last])) \
> +        best_idx = first; \
> +    else \
> +        best_idx = last; \
> +}

This can be done faster with a look up table
and a single right value vs. left value check

> +
> +/**
> + * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
> + *  @param s               encoder context
> + *  @param output          output buffer
> + *  @param output_size     size of output buffer
> + *  @param samples         input samples
> + */
> +static void encode_block(NellyMoserEncodeContext *s,
> +                         unsigned char *output, int output_size, float *samples)
> +{
> +    PutBitContext pb;
> +    int i, band, block, best_idx, power_idx = 0;
> +    float power_val, power_candidate, coeff, coeff_sum;
> +    int band_start, band_end;
> +
> +    apply_mdct(s, samples, s->mdct_out);
> +    apply_mdct(s, samples + NELLY_BUF_LEN, s->mdct_out + NELLY_BUF_LEN);
> +
> +    init_put_bits(&pb, output, output_size * 8);
> +
> +    band_start = 0;
> +    band_end = ff_nelly_band_sizes_table[0];
> +    for (band = 0; band < NELLY_BANDS; band++) {
> +        coeff_sum = 0;
> +        for (i = band_start; i < band_end; i++) {

> +            for (block = 0; block < 2; block++) {
> +                coeff = s->mdct_out[i + block * NELLY_BUF_LEN];
> +                coeff_sum += coeff * coeff;
> +            }

id unroll that by hand to
coeff_sum += s->mdct_out[i                ]*s->mdct_out[i                ];
            +s->mdct_out[i + NELLY_BUF_LEN]*s->mdct_out[i + NELLY_BUF_LEN];

> +        }
> +        power_candidate =
> +            (log(FFMAX(64.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 1))) -
> +             log(64.0)) * 1024.0 / M_LN2;

log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;

also this is based on
(sum(0..N) ABS(coeff)^2/N)^(1/2)

it would be interresting to try
C*(sum(0..N) ABS(coeff)^D/N)^(1/D) for different values of C and D

maybe you could try
C={0.9,1.0,1.1}
D={1.9,2.0,2.1}
at first and see if any improves distortion

> +
> +        if (band) {
> +            power_candidate -= power_idx;
> +            find_best_value(power_candidate, ff_nelly_delta_table, 32, best_idx);
> +            put_bits(&pb, 5, best_idx);
> +            power_idx += ff_nelly_delta_table[best_idx];
> +        } else {
> +            //base exponent
> +            find_best_value(power_candidate, ff_nelly_init_table, 64, best_idx);
> +            put_bits(&pb, 6, best_idx);
> +            power_idx = ff_nelly_init_table[best_idx];
> +        }

I wish i knew how to optimally assign these values, sadly i do not.
Suggestions would be welcome of course in case anyone has an idea on how
to optimally select them, the tricky part is that these not only scale the
signal, they also are the basis upon which the bits per band and thus
encoding is selected. 

Still they could be made to closer match the "power_candidate" values from
above using viterbi though arguably it would just be closer to a guess.

An alternative may be to just retry the whole encode_block with slightly
changed power_candidate values for each band and pick what end up with the
least distortion (that is least difference to the input signal)
This should be rather easy to try ...

> +
> +        if (power_idx >= 0) {
> +            power_val = pow_table[power_idx & 0x7FF] / (1 << (power_idx >> 11));
> +        } else {
> +            power_val = -pow(2, -power_idx / 2048.0 - 3.0);
> +        }

power_idx can be <0 ?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I hate to see young programmers poisoned by the kind of thinking
Ulrich Drepper puts forward since it is simply too narrow -- Roman Shaposhnik
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080828/4b6b28a4/attachment.pgp>