[FFmpeg-devel] [PATCH] AAC Decoder round 3

Michael Niedermayer michaelni
Sun Jul 6 05:19:17 CEST 2008


On Tue, Jul 01, 2008 at 12:37:11PM +0100, Robert Swain wrote:
> Hello,
> 
> Another submission for full review, this time without both LTP and
> SSR. The code for LTP and SSR is still in the Summer of Code
> repository for anyone who is interested in working on either of those
> features in the future.
> 
> See attached.

[...]

> +DECLARE_ALIGNED_16(static float, kbd_long_1024[1024]);
> +DECLARE_ALIGNED_16(static float, kbd_short_128[128]);
> +DECLARE_ALIGNED_16(static float, sine_long_1024[1024]);
> +DECLARE_ALIGNED_16(static float, sine_short_128[128]);

vertical align


[...]
> +/**
> + * special codebooks
> + */
> +enum {
> +    ZERO_HCB       = 0,
> +    FIRST_PAIR_HCB = 5,
> +    ESC_HCB        = 11,
> +    NOISE_HCB      = 13,
> +    INTENSITY_HCB2 = 14,
> +    INTENSITY_HCB  = 15,
> +    ESC_FLAG       = 16,
> +};

This should have a type and that type should be used for the variable
holding such enums


[...]
> +/**
> + * Program configuration - describes how channels are arranged. Either read from
> + * stream (ID_PCE) or created based on a default fixed channel arrangement.
> + */
> +typedef struct {
> +    int che_type[4][MAX_TAGID];   ///< channel element type with the first index as the first 4 raw_data_block IDs
> +

> +    int mono_mixdown;         ///< The SCE tag to use if user requests mono   output, -1 if not available.
> +    int stereo_mixdown;       ///< The CPE tag to use if user requests stereo output, -1 if not available.

maybe mono_mixdown_tag would be a better name ?


[...]
> +/**
> + * coupling parameters
> + */
> +typedef struct {

> +    int ind_sw;            ///< Set if independent coupling (i.e. after IMDCT).

is_indep_coup or another name one can make sense of without having to
look it up here.


[...]
> +/**
> + * Free a channel element.
> + */
> +static void che_freep(ChannelElement **s) {
> +    if(!*s)
> +        return;
> +    av_freep(s);
> +}

am i too tired or is the if-return useless and this a senseless wraper
function?


> +
> +/**
> + * Configure output channel order and optional mixing based on the current
> + * program configuration element and user requested channels.
> + *
> + * \param newpcs New program configuration struct - we only do something if it differs from the current one.
> + */
> +static int output_configure(AACContext *ac, ProgramConfig *newpcs) {
> +    AVCodecContext *avctx = ac->avccontext;
> +    ProgramConfig * pcs = &ac->pcs;
> +    int i, j, channels = 0;
> +    float a, b;
> +    ChannelElement *mixdown[3] = { NULL, NULL, NULL };
> +
> +    static const float mixdowncoeff[4] = {
> +        /* matrix mix-down coefficient, table 4.70 */
> +        1. / M_SQRT2,
> +        1. / 2.,
> +        1. / (2 * M_SQRT2),
> +        0
> +    };
> +
> +    if(!memcmp(&ac->pcs, newpcs, sizeof(ProgramConfig)))
> +        return 0; /* no change */
> +
> +    *pcs = *newpcs;
> +
> +    /* Allocate or free elements depending on if they are in the
> +     * current program configuration struct.
> +     *
> +     * Set up default 1:1 output mapping.
> +     *
> +     * For a 5.1 stream the output order will be:
> +     *    [ Front Left ] [ Front Right ] [ Center ] [ LFE ] [ Surround Left ] [ Surround Right ]
> +     *
> +     * Locate front, center and back channels for later matrix mix-down.
> +     */
> +
> +    for(i = 0; i < MAX_TAGID; i++) {
> +        for(j = 0; j < 4; j++) {
> +            if(pcs->che_type[j][i] && !ac->che[j][i]) {
> +                ac->che[j][i] = av_mallocz(sizeof(ChannelElement));
> +                if(j != ID_CCE) {
> +                    ac->output_data[channels++] = ac->che[j][i]->ch[0].ret;
> +                    ac->che[j][i]->ch[0].mixing_gain = 1.0f;
> +                    if(j == ID_CPE) {
> +                        ac->output_data[channels++] = ac->che[j][i]->ch[1].ret;
> +                        ac->che[j][i]->ch[1].mixing_gain = 1.0f;
> +                        if(!mixdown[MIXDOWN_FRONT] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> +                            mixdown[MIXDOWN_FRONT] = ac->che[j][i];
> +                        if(!mixdown[MIXDOWN_BACK ] && pcs->che_type[j][i] == AAC_CHANNEL_BACK)
> +                            mixdown[MIXDOWN_BACK ] = ac->che[j][i];
> +                    }
> +                    if(j == ID_SCE && !mixdown[MIXDOWN_CENTER] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> +                        mixdown[MIXDOWN_CENTER] = ac->che[j][i];
> +                }
> +            } else
> +                che_freep(&ac->che[j][i]);
> +        }
> +    }
> +
> +    ac->mm[MIXDOWN_FRONT] = ac->mm[MIXDOWN_BACK] = ac->mm[MIXDOWN_CENTER] = NULL;
> +
> +    /* Check for matrix mix-down to mono or stereo. */
> +
> +    if(avctx->request_channels && avctx->request_channels <= 2 &&
> +       avctx->request_channels != channels) {
> +
> +        if((avctx->request_channels == 1 && pcs->mono_mixdown   != -1) ||
> +           (avctx->request_channels == 2 && pcs->stereo_mixdown != -1)) {
> +            /* Add support for this as soon as we get a sample so we can figure out
> +               exactly how this is supposed to work. */
> +            av_log(avctx, AV_LOG_ERROR,
> +                   "Mix-down using pre-mixed elements is not supported, please file a bug. "
> +                   "Reverting to matrix mix-down.\n");
> +        }
> +
> +        /* We need 'center + L + R + sL + sR' for matrix mix-down. */
> +        if(mixdown[MIXDOWN_CENTER] && mixdown[MIXDOWN_FRONT] && mixdown[MIXDOWN_BACK]) {
> +            a = mixdowncoeff[pcs->mixdown_coeff_index];
> +
> +            if(avctx->request_channels == 2) {
> +                b = 1. / (1. + (1. / M_SQRT2) + a * (pcs->pseudo_surround ? 2. : 1.));
> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b / M_SQRT2;
> +            } else {
> +                b = 1. / (3. + 2. * a);
> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b;
> +            }
> +            mixdown[MIXDOWN_FRONT]->ch[0].mixing_gain = b;
> +            mixdown[MIXDOWN_FRONT]->ch[1].mixing_gain = b;
> +            mixdown[MIXDOWN_BACK ]->ch[0].mixing_gain = b * a;
> +            mixdown[MIXDOWN_BACK ]->ch[1].mixing_gain = b * a;
> +            for(i = 0; i < 3; i++) ac->mm[i] = mixdown[i];
> +
> +            channels = avctx->request_channels;
> +        } else {
> +            av_log(avctx, AV_LOG_WARNING, "Matrix mixing from %d to %d channels is not supported.\n",
> +                   channels, avctx->request_channels);
> +        }
> +    }
> +
> +    avctx->channels = channels;

> +    ac->interleaved_output = av_realloc(ac->interleaved_output, channels * 1024 * sizeof(float));

memleak on allocation failure


[...]

> +// parser implementation

?


[...]
> +/**
> + * Decode section_data payload; reference: table 4.46.
> + */
> +static int decode_section_data(AACContext * ac, GetBitContext * gb, IndividualChannelStream * ics, int cb[][64], int cb_run_end[][64]) {
> +    int g;
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        int bits = (ics->window_sequence == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
> +        int k = 0;
> +        while (k < ics->max_sfb) {
> +            int sect_len = k;
> +            int sect_len_incr;
> +            int sect_cb = get_bits(gb, 4);
> +            if (sect_cb == 12) {
> +                av_log(ac->avccontext, AV_LOG_ERROR, "invalid codebook\n");
> +                return -1;
> +            }

> +            while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits)-1)
> +                sect_len += sect_len_incr;
> +            sect_len += sect_len_incr;
> +            if (sect_len > ics->max_sfb) {

change the check to unsigned or check for < 0 please, it doesnt seeem to
do any harm but <0 is not correct


[...]
> +/**
> + * Decode scale_factor_data; reference: table 4.47.
> + */
> +static int decode_scale_factor_data(AACContext * ac, GetBitContext * gb, float mix_gain, unsigned int global_gain, IndividualChannelStream * ics, const int cb[][64], const int cb_run_end[][64], float sf[][64]) {
> +    const int sf_offset = ac->sf_offset + (ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 12 : 0);
> +    int g, i;
> +    int offset[3] = { global_gain, global_gain - 90, 100 };
> +    int noise_flag = 1;
> +    static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
> +    ics->intensity_present = 0;
> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb;) {
> +            if (cb[g][i] == ZERO_HCB) {
> +                int run_end = cb_run_end[g][i];
> +                for(; i < run_end; i++)
> +                    sf[g][i] = 0.;
> +                continue;

useless continue and this could use a memset(0)


> +            }else if((cb[g][i] == INTENSITY_HCB) || (cb[g][i] == INTENSITY_HCB2)) {

> +                int run_end = cb_run_end[g][i];

that can be factored out of the if/else


[...]
> +static void pulse_tool(AACContext * ac, const IndividualChannelStream * ics, const Pulse * pulse, int * icoef) {
> +    int i, off = ics->swb_offset[pulse->start];
> +    for (i = 0; i < pulse->num_pulse; i++) {
> +        off += pulse->offset[i];

> +        if (icoef[off] > 0)
> +            icoef[off] += pulse->amp[i];
> +        else
> +            icoef[off] -= pulse->amp[i];

ic= icoef[off]>>31; or ic= (icoef[off]-1)>>31 (depending in 0)
icoef[off] += (pulse->amp[i]^ic)-ic;
(yes this shouldbe faster)



> +    }
> +}
> +

> +static void quant_to_spec_tool(AACContext * ac, const IndividualChannelStream * ics, const int * icoef, const int cb[][64], const float sf[][64], float * coef) {
> +    const uint16_t * offsets = ics->swb_offset;
> +    const int c = 1024/ics->num_window_groups;
> +    int g, i, group, k;
> +

> +    for(g = 0; g < ics->num_window_groups; g++)
> +        memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(c - offsets[ics->max_sfb]));
> +
> +    for (g = 0; g < ics->num_window_groups; g++) {

cant these 2 loops be merged?
also the spaces sourrounding ( are inconsisent


> +        for (i = 0; i < ics->max_sfb; i++) {
> +            if (cb[g][i] == NOISE_HCB) {
> +                for (group = 0; group < ics->group_len[g]; group++) {
> +                    float scale = sf[g][i] / ((offsets[i+1] - offsets[i]) * PNS_MEAN_ENERGY);
> +                    for (k = offsets[i]; k < offsets[i+1]; k++)
> +                        coef[group*128+k] = (int32_t)av_random(&ac->random_state) * scale;
> +                }
> +            } else if (cb[g][i] != INTENSITY_HCB && cb[g][i] != INTENSITY_HCB2) {
> +                for (group = 0; group < ics->group_len[g]; group++) {
> +                    for (k = offsets[i]; k < offsets[i+1]; k++) {
> +                        coef[group*128+k] = ivquant(ac, icoef[group*128+k]) * sf[g][i];
> +                    }
> +                }
> +            }
> +        }

> +        coef += ics->group_len[g]*128;
> +        icoef += ics->group_len[g]*128;

vertical align


[...]
> +static int dynamic_range_info(AACContext * ac, GetBitContext * gb, int cnt) {
> +    int n = 1;
> +    int drc_num_bands = 1;
> +    int i;
> +
> +    /* pce_tag_present? */
> +    if(get_bits1(gb)) {

> +        ac->che_drc.pce_instance_tag = get_bits(gb, 4);
> +        ac->che_drc.tag_reserved_bits = get_bits(gb, 4);

vertical align


> +        n++;
> +    }
> +
> +    /* excluded_chns_present? */
> +    if(get_bits1(gb)) {
> +        n += excluded_channels(ac, gb);
> +    }
> +
> +    /* drc_bands_present? */
> +    if (get_bits1(gb)) {
> +        ac->che_drc.band_incr = get_bits(gb, 4);
> +        ac->che_drc.interpolation_scheme = get_bits(gb, 4);
> +        n++;
> +        drc_num_bands += ac->che_drc.band_incr;
> +        for (i = 0; i < drc_num_bands; i++) {
> +            ac->che_drc.band_top[i] = get_bits(gb, 8);
> +            n++;
> +        }
> +    }
> +
> +    /* prog_ref_level_present? */
> +    if (get_bits1(gb)) {
> +        ac->che_drc.prog_ref_level = get_bits(gb, 7);

> +        ac->che_drc.prog_ref_level_reserved_bits = get_bits1(gb);

reserved bits? doesnt sound like that needs to be stored in the conext
but then i didnt check teh spec ...


[...]
> +static void coupling_tool(AACContext * ac, int independent, int domain) {

This as well as other functions could benefit from some documentation
what does it do, what is "independent" what "domain" ?


> +    int i;
> +    for (i = 0; i < MAX_TAGID; i++) {
> +        ChannelElement * cc = ac->che[ID_CCE][i];
> +        if (cc) {
> +            if (cc->coup.ind_sw && independent) {
> +                transform_coupling_tool(ac, cc, coupling_independent_trans);
> +            } else if (!cc->coup.ind_sw && !independent && (cc->coup.domain == domain)) {
> +                transform_coupling_tool(ac, cc, coupling_dependent_trans);
> +            }
> +        }
> +    }
> +}
> +

> +static void transform_sce_tool(AACContext * ac, void (*sce_trans)(AACContext * ac, SingleChannelElement * sce)) {

iam still not happy with _tool() functions. Functions should be named
so that the names describes what the function does.



[...]
> +static void spec_to_sample(AACContext * ac) {

what is "spec to sample" ? the name does not say anythig to me


[...]

> +    i = 1024 * avccontext->channels * sizeof(uint16_t);
> +    if(*data_size < i)
> +        return -1;

i wonder if this shouldnt be checked earlier (closer to where the number of
channels is set)


> +    *data_size = i;
> +
> +    ac->dsp.float_to_int16(data, ac->interleaved_output, 1024 * avccontext->channels);
> +    return 0;
> +}
> +
> +
> +static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
> +    AACContext * ac = avccontext->priv_data;
> +    GetBitContext gb;
> +    int id, err, tag;
> +
> +    init_get_bits(&gb, buf, buf_size*8);
> +
> +    // parse
> +    while ((id = get_bits(&gb, 3)) != ID_END) {
> +        tag = get_bits(&gb, 4);
> +        switch (id) {
> +
> +        case ID_SCE:
> +            if(!ac->che[ID_SCE][tag]) {
> +                if(tag == 1 && ac->che[ID_LFE][0]) {
> +                    /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
> +                       instead of SCE[0] CPE[0] CPE[0] LFE[0].
> +                       If we seem to have encountered such a stream,
> +                       transfer the LFE[0] element to SCE[1] */
> +                    ac->che[ID_SCE][tag] = ac->che[ID_LFE][0];
> +                    ac->che[ID_LFE][0] = NULL;
> +                } else {
> +                    err = 1;
> +                    break;
> +                }
> +            }
> +            err = decode_ics(ac, &gb, 0, 0, &ac->che[ID_SCE][tag]->ch[0]);
> +            break;
> +
> +        case ID_CPE:
> +            if (ac->che[ID_CPE][tag]) {
> +                err = decode_cpe(ac, &gb, tag);
> +            } else
> +                err = -1;
> +            break;
> +
> +        case ID_FIL:
> +            if (tag == 15)
> +                tag += get_bits(&gb, 8) - 1;
> +            while (tag > 0)
> +                tag -= extension_payload(ac, &gb, tag);
> +            err = 0; /* FIXME */
> +            break;
> +
> +        case ID_PCE:
> +            err = program_config_element(ac, &gb);
> +            break;
> +
> +        case ID_DSE:
> +            data_stream_element(ac, &gb, tag);
> +            err = 0;
> +            break;
> +
> +        case ID_CCE:
> +            if (ac->che[ID_CCE][tag]) {
> +                err = decode_cce(ac, &gb, tag);
> +            } else
> +                err = -1;
> +            break;
> +
> +        case ID_LFE:
> +            if (ac->che[ID_LFE][tag]) {
> +                err = decode_ics(ac, &gb, 0, 0, &ac->che[ID_LFE][tag]->ch[0]);
> +            } else
> +                err = -1;
> +            break;
> +
> +        default:
> +            err = -1; /* should not happen, but keeps compiler happy */
> +            break;
> +        }

by setting err= -1 before the switch the code can be simplified

> +
> +        if(err)
> +            return -1;
> +    }
> +
> +    spec_to_sample(ac);
> +    output_samples(avccontext, data, data_size);
> +
> +    return buf_size;
> +}
> +

> +static int aac_decode_close(AVCodecContext * avccontext) {

av_cold


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080706/a386a7e0/attachment.pgp>



More information about the ffmpeg-devel mailing list