[FFmpeg-devel] [PATCH] HE-AAC v1 decoder

Wed Jan 27 11:53:24 CET 2010

On Jan 26, 2010, at 10:10 PM, Alex Converse wrote:

> Greetings Sirs,
> 
> Enclosed you will find one (1) HE-AAC decoder to make glorious great
> software FFmpeg.
> 
> Heart,
> 
> Alex

Some comments:

> @@ -1815,7 +1814,7 @@ static void apply_independent_coupling(AACContext *ac,
>      const float *src = cce->ch[0].ret;
>      float *dest = target->ret;
>  
> -    for (i = 0; i < 1024; i++)
> +    for (i = 0; i < 2048; i++)
>          dest[i] += gain * (src[i] - bias);
>  }
> 
Looks like a SIMD opportunity.

> +/// constant to avoid division by zero, e.g. 96 dB below maximum signal input
> +#define EPS0 0.000000000001

You can use FLT_EPSILON.

> +static void make_bands(int16_t* bands, int start, int stop, int num_bands)

int16_t *bands

> +    base = powf((float)stop / start, 1.0f / num_bands);

The (float) cast isn't needed.
And the file uses 'float x = 1.0f' in a few places where the 'f' isn't needed, but it doesn't really matter.

> +    for (k = 0; k < num_bands-1; k++) {
> +        prod *= base;
> +        present  = lroundf(start * prod);
> +        bands[k] = present - previous;
> +        previous = present;
> +    }

You use lroundf() a lot, which is a library call for me. Can't you set the rounding direction (or just ignore it) and cast instead?
Also, this is called later on with the output qsorted afterwards, but it doesn't look like it can be generated out of order.

> +    if (sbr->sample_rate == 16000) {
> +        sbr_offset_ptr = sbr_offset[0];
> +    } else if (sbr->sample_rate == 22050) {
> +        sbr_offset_ptr = sbr_offset[1];
> +    } else if (sbr->sample_rate == 24000) {
> +        sbr_offset_ptr = sbr_offset[2];
> +    } else if (sbr->sample_rate == 32000) {
> +        sbr_offset_ptr = sbr_offset[3];
> +    } else if ((sbr->sample_rate >= 44100) &&
> +               (sbr->sample_rate <= 64000)) {
> +        sbr_offset_ptr = sbr_offset[4];
> +    } else if (sbr->sample_rate > 64000) {
> +        sbr_offset_ptr = sbr_offset[5];
> +    } else {
> +        av_log(ac->avccontext, AV_LOG_ERROR,
> +               "Unsupported sample rate for SBR: %d\n", sbr->sample_rate);
> +        return -1;
> +    }

Kind of long. I'd use a switch, except for the arbitrary samplerates allowed after 44.1k (why only there?).

> +        if (sbr->k[2] / (float)sbr->k[0] > 2.2449) {
> +            two_regions = 1;
> +            sbr->k[1] = sbr->k[0] << 1;
> +        } else {
> +            two_regions = 0;
> +            sbr->k[1] = sbr->k[2];
> +        }

Here it should be 2.2449f. What's 'k'?

> +            if (vdk1_min < vdk0_max) {
> +                int change;
> +                qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), qsort_comparison_function_int16);
> +                change = FFMIN(vdk0_max - vk1[1], (vk1[num_bands_1] - vk1[1]) >> 1);
> +                vk1[1]           += change;
> +                vk1[num_bands_1] -= change;
> +            }
> +
> +            qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), qsort_comparison_function_int16);

Looks like it unnecessarily sorts twice.

> +            sbr->n_master = num_bands_0 + num_bands_1;
> +            memcpy(&sbr->f_master[0],               vk0,
> +                   (num_bands_0 + 1) * sizeof(sbr->f_master[0]));
> +            memcpy(&sbr->f_master[num_bands_0 + 1], vk1 + 1,
> +                   num_bands_1      * sizeof(sbr->f_master[0]));

Nitpick: one more space before 'num_bands_1'.

> +    // temp == max number of QMF subbands
> +    if (sbr->sample_rate <= 32000) {
> +        temp = 48;
> +    } else if (sbr->sample_rate == 44100) {
> +        temp = 35;
> +    } else if (sbr->sample_rate >= 48000)
> +        temp = 32;

So name it that...

> +    if (sbr->k[2] - sbr->k[0] > temp) {
> +        av_log(ac->avccontext, AV_LOG_ERROR,
> +               "Invalid bitstream, too many QMF subbands: %d\n", sbr->k[2] - sbr->k[0]);
> +        return -1;
> +    }

The formatting ("invalid bitstream") of this one is different from the other errors.

> +    if (goal_sb < sbr->k[4] + sbr->m[1]) {
> +        for (k = 0; sbr->f_master[k] < goal_sb; k++);
> +    } else
> +        k = sbr->n_master;

IIRC some compilers warn about 'for (...);' with no space before ;.

> +/// High Frequency Generation - Patch Construction (14496-3 sp04 p216 fig. 4.46)
> +static int sbr_hf_calc_npatches(AACContext *ac, SpectralBandReplication *sbr)
> +{
> +    int i, k, sb = 0;
> +    int msb = sbr->k[0];
> +    int usb = sbr->k[4];
> +    int goal_sb = lroundf((1 << 11) * 1000 / (float)sbr->sample_rate);

What do those names mean?

> +static inline int in_table(void *table, int last_el, int el_size, void *needle)
> +{
> +    int i;
> +    uint8_t *table_ptr = table; // avoids a warning with void * ptr arith
> +    for (i = 0; i <= last_el; i++, table_ptr += el_size)
> +        if (!memcmp(table_ptr, needle, el_size))
> +            return 1;
> +    return 0;
> +}

I think the comment is superfluous.
Also, this is only used with sizeof(int16_t).

> +    // Limiter Frequency Band Table (14496-3 sp04 p198)
> +    if (sbr->bs_limiter_bands > 0) {
> +        const float lim_bands_per_octave[3] = {1.2, 2, 3};
> +        int16_t patch_borders[sbr->num_patches + 1];

Not using 'static' might make it build the array on the stack anyway.

> +        dk = phi[2][1][0] * phi[1][0][0] -
> +               (phi[1][1][0] * phi[1][1][0] + phi[1][1][1] * phi[1][1][1]) / 1.000001f;

* (1.f/1.000001f)

> +    if (sbr->bs_interpol_freq) {
> +        for (l = 0; l < ch_data->bs_num_env[1]; l++) {
> +            const int env_size = (ch_data->t_env[l + 1] - ch_data->t_env[l]) << 1;
> ...
> +                e_curr[l][m] = sum / env_size;

Use the inverse of env_size here too.

> +    // max gain limits : -3dB, 0dB, 3dB, inf dB (limiter off)
> +    static const float limgain[4] = { 0.70795, 1.0, 1.41254, 10000000000 };

This is static const, but the ones in the next function aren't.

> +    float g_filt[42][48], q_filt[42][48], w_temp[42][48][2];
> +    float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
> +    memcpy(Y[0], Y[1], sizeof(Y[0]));

...and they're 32KB+ on stack, maybe put them in a context?

What is all the memcpy(x[0],x[1],sizeof(x[0])); used for?

> +void ff_sbr_dequant(AACContext *ac, SpectralBandReplication *sbr, int id_aac)
> +{
> +    int ch;
> +
> +    if (sbr->start) {
> +        for (ch = 0; ch < (id_aac == TYPE_CPE ? 2 : 1); ch++) {
> +        sbr_time_freq_grid(ac, sbr, &sbr->data[ch], ch);
> +        sbr_env_noise_floors(sbr, &sbr->data[ch], ch);
> +        }
> +        sbr_dequant(sbr, id_aac);
> +    }
> +}

'if (!sbr->start) return;' if you want.

And indent the loop contents.

> +    /**
> +     * @defgroup state         State varaibles
> +     * @{
> +     */

variables

In SpectralBandReplication, you might get unnecessary struct padding if you put uint8_t in the middle of struct definitions. Best to put them at the end unless it's already organized for cache use.

> --- /dev/null
> +++ b/libavcodec/aacsbr_internal.h
> @@ -0,0 +1,48 @@
> +/*
> + * AAC Spectral Band Replication function delcarations
> + * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )

declarations (and it's 2010)

> +/** Dequantized all channels in one SBR element. */
> +void ff_sbr_dequant(AACContext *ac, SpectralBandReplication *sbr, int id_aac);

dequantize