[FFmpeg-devel] AAC decoder round 5

Robert Swain robert.swain
Thu Aug 7 17:04:44 CEST 2008


2008/8/6 Michael Niedermayer <michaelni at gmx.at>:
> On Wed, Aug 06, 2008 at 12:32:32AM +0100, Robert Swain wrote:
>> $subj
>>
>> Best regards,
>> Rob
>
>> Index: Changelog
>> ===================================================================
>> --- Changelog (revision 14623)
>> +++ Changelog (working copy)
>> @@ -128,6 +128,7 @@
>>  - Motion Pixels MVI demuxer
>>  - removed animated GIF decoder/demuxer
>>  - D-Cinema audio muxer
>> +- AAC decoder
>>
>>  version 0.4.9-pre1:
>>
>
> ok

You can ignore/I can not submit the build system and documentation
changes. I won't commit them until the whole thing has been committed.

> [...]
>
>> Index: libavcodec/aactab.c
>> ===================================================================
>> --- libavcodec/aactab.c       (revision 14625)
>> +++ libavcodec/aactab.c       (working copy)

[...]

>> +DECLARE_ALIGNED(16, float,  ff_aac_kbd_long_1024[1024]);
>> +DECLARE_ALIGNED(16, float,  ff_aac_kbd_short_128[128]);
>> +DECLARE_ALIGNED(16, float, ff_aac_sine_long_1024[1024]);
>> +DECLARE_ALIGNED(16, float, ff_aac_sine_short_128[128]);
>> +
>> +const uint8_t ff_aac_num_swb_1024[] = {
>> +    41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40
>> +};
>> +
>> +const uint8_t ff_aac_num_swb_128[] = {
>> +    12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15
>> +};
>> +
>>  const uint32_t ff_aac_scalefactor_code[121] = {
>>      0x3ffe8, 0x3ffe6, 0x3ffe7, 0x3ffe5, 0x7fff5, 0x7fff1, 0x7ffed, 0x7fff6,
>>      0x7ffee, 0x7ffef, 0x7fff0, 0x7fffc, 0x7fffd, 0x7ffff, 0x7fffe, 0x7fff7,
>
>
>> @@ -795,4 +809,90 @@
>>       4064.0312908,  4074.6805676,  4085.3368071,  4096.0000000,
>>  };
>>
>> +/* [ 0, 255] scale factor decoding when using C dsp.float_to_int16
>> + * [60, 315] scale factor decoding when using SIMD dsp.float_to_int16
>> + * [45, 300] intensity stereo position decoding mapped in reverse order i.e. 0->300, 1->299, ..., 254->46, 255->45
>> + */
>
> not doxygen compat, no description of what the table actually contains.

Comment quoted above improved. Does your statement apply to all of the
above or just the comment for pow2sf_tab[]?

> [...]
>
>> Index: libavcodec/aac.c
>> ===================================================================
>> --- libavcodec/aac.c  (revision 14626)
>> +++ libavcodec/aac.c  (working copy)
>
> [...]
>> +/**
>> + * Configure output channel order and optional mixing based on the current
>> + * program configuration element and user requested channels.
>> + *
>> + * \param newpcs New program configuration struct - we only do something if it differs from the current one.
>> + */
>> +static int output_configure(AACContext *ac, ProgramConfig *pcs, ProgramConfig *newpcs) {
>> +    AVCodecContext *avctx = ac->avccontext;
>> +    int i, j, channels = 0;
>> +    float a, b;
>> +    ChannelElement *mixdown[3] = { NULL, NULL, NULL };
>> +
>> +    static const float mixdowncoeff[4] = {
>> +        /* matrix mix-down coefficient, table 4.70 */
>> +        1. / M_SQRT2,
>> +        1. / 2.,
>> +        1. / (2 * M_SQRT2),
>> +        0
>> +    };
>> +
>> +    if(!memcmp(pcs, newpcs, sizeof(ProgramConfig)))
>> +        return 0; /* no change */
>> +
>> +    *pcs = *newpcs;
>> +
>> +    /* Allocate or free elements depending on if they are in the
>> +     * current program configuration struct.
>> +     *
>> +     * Set up default 1:1 output mapping.
>> +     *
>> +     * For a 5.1 stream the output order will be:
>> +     *    [ Front Left ] [ Front Right ] [ Center ] [ LFE ] [ Surround Left ] [ Surround Right ]
>> +     *
>> +     * Locate front, center and back channels for later matrix mix-down.
>> +     */
>> +
>> +    for(i = 0; i < MAX_TAGID; i++) {
>> +        for(j = 0; j < 4; j++) {
>> +            if(pcs->che_type[j][i]) {
>> +                if(!ac->che[j][i] && !(ac->che[j][i] = av_mallocz(sizeof(ChannelElement))))
>> +                    return AVERROR(ENOMEM);
>> +                if(j != ID_CCE) {
>> +                    ac->output_data[channels++] = ac->che[j][i]->ch[0].ret;
>> +                    ac->che[j][i]->ch[0].mixing_gain = 1.0f;
>> +                    if(j == ID_CPE) {
>> +                        ac->output_data[channels++] = ac->che[j][i]->ch[1].ret;
>> +                        ac->che[j][i]->ch[1].mixing_gain = 1.0f;
>> +                        if(!mixdown[MIXDOWN_FRONT] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
>> +                            mixdown[MIXDOWN_FRONT] = ac->che[j][i];
>> +                        if(!mixdown[MIXDOWN_BACK ] && pcs->che_type[j][i] == AAC_CHANNEL_BACK)
>> +                            mixdown[MIXDOWN_BACK ] = ac->che[j][i];
>> +                    }
>> +                    if(j == ID_SCE && !mixdown[MIXDOWN_CENTER] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
>> +                        mixdown[MIXDOWN_CENTER] = ac->che[j][i];
>> +                }
>> +            } else
>> +                av_freep(&ac->che[j][i]);
>> +        }
>> +    }
>> +
>> +    // allocate appropriately aligned buffer for interleaved output
>> +    if(channels > avctx->channels)
>> +        av_freep(&ac->interleaved_output);
>> +    if(!ac->interleaved_output && !(ac->interleaved_output = av_malloc(channels * 1024 * sizeof(float))))
>> +        return AVERROR(ENOMEM);
>> +
>> +    ac->mm[MIXDOWN_FRONT] = ac->mm[MIXDOWN_BACK] = ac->mm[MIXDOWN_CENTER] = NULL;
>> +
>> +    /* Check for matrix mix-down to mono or stereo. */
>> +
>> +    if(avctx->request_channels && avctx->request_channels <= 2 &&
>> +       avctx->request_channels != channels) {
>> +
>> +        if((avctx->request_channels == 1 && pcs->mono_mixdown_tag   != -1) ||
>> +           (avctx->request_channels == 2 && pcs->stereo_mixdown_tag != -1)) {
>> +            /* Add support for this as soon as we get a sample so we can figure out
>> +               exactly how this is supposed to work. */
>> +            av_log(avctx, AV_LOG_ERROR,
>> +                   "Mix-down using pre-mixed elements is not supported, please file a bug. "
>> +                   "Reverting to matrix mix-down.\n");
>> +        }
>> +
>> +        /* We need 'center + L + R + sL + sR' for matrix mix-down. */
>> +        if(mixdown[MIXDOWN_CENTER] && mixdown[MIXDOWN_FRONT] && mixdown[MIXDOWN_BACK]) {
>
>> +            a = mixdowncoeff[pcs->mixdown_coeff_index];
>> +
>> +            if(avctx->request_channels == 2) {
>> +                b = 1. / (1. + (1. / M_SQRT2) + a * (pcs->pseudo_surround ? 2. : 1.));
>> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b / M_SQRT2;
>> +            } else {
>> +                b = 1. / (3. + 2. * a);
>> +                mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain      = b;
>> +            }
>> +            mixdown[MIXDOWN_FRONT]->ch[0].mixing_gain = b;
>> +            mixdown[MIXDOWN_FRONT]->ch[1].mixing_gain = b;
>> +            mixdown[MIXDOWN_BACK ]->ch[0].mixing_gain = b * a;
>> +            mixdown[MIXDOWN_BACK ]->ch[1].mixing_gain = b * a;
>
> this is totally obfuscated
> at least rename b to something that has something to do with normalization
> factor.
> And while it is written in the spec like above, i do not think it is correct.
> As it does not seem volume is kept constant in the convertion. Also AC3 has to
> do downmix too and the matrix init could be shared.
> also mixing_gain and global_gain could maybe be merged if the volume is
> slightly changed ... and as it does not seem to be correct as is anyway
> this might be worth a try ...

See later discussion of matrix mix-down marked ***

> [...]
>> +{
>> +    /* Pre-mixed down-mix outputs are not available. */
>> +    newpcs->mono_mixdown_tag   = -1;
>> +    newpcs->stereo_mixdown_tag = -1;
>> +
>> +    if(channels < 1 || channels > 7) {
>> +        av_log(ac->avccontext, AV_LOG_ERROR, "invalid default channel configuration (%d channels)\n",
>> +               channels);
>> +        return -1;
>> +    }
>> +
>> +    /* default channel configurations:
>> +     *
>> +     * 1ch : front center (mono)
>> +     * 2ch : L + R (stereo)
>> +     * 3ch : front center + L + R
>> +     * 4ch : front center + L + R + back center
>> +     * 5ch : front center + L + R + back stereo
>> +     * 6ch : front center + L + R + back stereo + LFE
>
>> +     * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
>
> its still 8, so channels/num_channels is no longer an appropriate name for
> the vars used as its not 7 channels.

channel_configuration_index?

> [...]
>> @@ -213,6 +830,764 @@
>>      }
>>  }
>>
>
>> +/**
>> + * Dequantize and scale spectral data; reference: 4.6.3.3.
>> + *
>> + * @param   icoef       array of quantized spectral data
>> + * @param   band_type   array of the used band type
>> + * @param   sf          array of scalefactors or intensity stereo positions
>> + * @param   coef        array of dequantized, scaled spectral data
>
> why are the coeffs not dequantized in place, that is coef==icoef ?

icoef[] are integers. coef[] are floats. To be able to use
ivquant_tab[icoef[]], icoef[] have to be integers don't they?

> [...]
>> +/**
>> + * Decode an individual_channel_stream payload; reference: table 4.44.
>> + *
>> + * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
>> + * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_ics(AACContext * ac, SingleChannelElement * sce, GetBitContext * gb, int common_window, int scale_flag) {
>> +    int icoeffs[1024];
>> +    Pulse pulse;
>> +    TemporalNoiseShaping * tns = &sce->tns;
>> +    IndividualChannelStream * ics = &sce->ics;
>> +    float * out = sce->coeffs;
>> +    int global_gain, pulse_present = 0;
>> +
>
>> +    pulse.num_pulse = 0;
>> +    pulse.start     = 0;
>
> what effect does start have when num_pulse=0 ?

These are assigned only to silence a GCC warning about them possibly
being uninitialised when they always will be in the cases that they're
used. I could add a comment to this effect.

> [...]
>> +/**
>> + * intensity stereo decoding; reference: 4.6.8.2.3
>> + */
>> +static void apply_intensity_stereo(ChannelElement * cpe) {
>> +    const IndividualChannelStream * ics = &cpe->ch[1].ics;
>> +    SingleChannelElement * sce1 = &cpe->ch[1];
>> +    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
>> +    const uint16_t * offsets = ics->swb_offset;
>> +    int g, gp, i, k;
>> +    int c;
>> +    float scale;
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        for (gp = 0; gp < ics->group_len[g]; gp++) {
>> +            for (i = 0; i < ics->max_sfb;) {
>> +                if (sce1->band_type[g][i] == INTENSITY_BT || sce1->band_type[g][i] == INTENSITY_BT2) {
>
> should the gp loop maybe be inside the if() so its not redone ?

Done for this and apply_mid_side_stereo().

> [...]
>> +
>> +/**
>> + * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
>> + *
>> + * @return  Returns number of bytes consumed.
>> + */
>> +static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, GetBitContext * gb) {
>> +    int i;
>> +    int n = 0;
>> +    int num_excl_chan = 0;
>> +
>> +    do {
>> +        for (i = 0; i < 7; i++)
>> +            che_drc->exclude_mask[num_excl_chan + i] = get_bits1(gb);
>> +        n++;
>> +        num_excl_chan += 7;
>> +    } while (num_excl_chan < MAX_CHANNELS - 7 && (che_drc->additional_excluded_chns[n-1] = get_bits1(gb)));
>> +
>> +    return n;
>> +}
>
> iam still not happy with this
> If i understand correctly this is a mask of channels, it should need just
> one array, not 2 (exclude_mask / additional_excluded_chns)

Indeed. I'll remove additional_excluded_chns[].

> [...]
>> +/**
>> + * Parse extension data (incomplete); reference: table 4.51.
>> + *
>> + * @param   cnt length of ID_FIL syntactic element in bytes
>> + */
>
>> +static int extension_payload(AACContext * ac, GetBitContext * gb, int cnt) {
>
> parse/decode/read...

I'll change to decode_ for consistency, including the comment.

> [...]
>> +
>> +/**
>> + * channel coupling transformation interface
>> + *
>> + * @param   index   index into coupling gain array
>> + * @param   apply_coupling_method   pointer to (in)dependent coupling function
>> + */
>> +static void apply_channel_coupling(AACContext * ac, ChannelElement * cc,
>> +        void (*apply_coupling_method)(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index))
>> +{
>> +    int c;
>> +    int index = 0;
>> +    ChannelCoupling * coup = &cc->coup;
>> +    for (c = 0; c <= coup->num_coupled; c++) {
>> +        if (     !coup->is_cpe[c] && ac->che[ID_SCE][coup->tag_select[c]]) {
>> +            apply_coupling_method(ac, &ac->che[ID_SCE][coup->tag_select[c]]->ch[0], cc, index++);
>> +        } else if(coup->is_cpe[c] && ac->che[ID_CPE][coup->tag_select[c]]) {
>
>> +            if (!coup->l[c] && !coup->r[c]) {
>> +                apply_coupling_method(ac, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], cc, index);
>> +                apply_coupling_method(ac, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], cc, index++);
>> +            }
>
> The struct docs say r/l are "apply gain to r/l channel" this contradicts the
> 0/0 case. which is correct and which is wrong?

In the spec, for cc_l[] it says:

"one bit indicating that a list of gain_element values is applied to the left
channel of a channel pair."

The code that uses it implies that description isn't strictly true. In
the !l[] && !r[] case, there is a shared list of gain_element values.
So l[] and r[] indicate the presence of a channel-specific list of
gain_element values. I'll change the comments.

> [...]
>> +
>> +/**
>> + * Conduct matrix mix-down and float to int16 conversion.
>> + *
>> + * @param   data        pointer to output data
>> + * @param   data_size   output data size in bytes
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int mixdown_and_convert_to_int16(AVCodecContext * avccontext, uint16_t * data, int * data_size) {
>> +    AACContext * ac = avccontext->priv_data;
>> +    int i;
>> +    float *c, *l, *r, *sl, *sr, *out;
>> +
>> +    if (!ac->is_saved) {
>> +        ac->is_saved = 1;
>> +        *data_size = 0;
>> +        return 0;
>> +    }
>> +
>> +    i = 1024 * avccontext->channels * sizeof(int16_t);
>> +    if(*data_size < i) {
>> +        av_log(avccontext, AV_LOG_ERROR,
>> +               "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
>> +               *data_size, i);
>> +        return -1;
>> +    }
>> +    *data_size = i;
>> +
>> +    if(ac->mm[MIXDOWN_CENTER]) {
>> +        /* matrix mix-down */
>> +        l   = ac->mm[MIXDOWN_FRONT ]->ch[0].ret;
>> +        r   = ac->mm[MIXDOWN_FRONT ]->ch[1].ret;
>> +        c   = ac->mm[MIXDOWN_CENTER]->ch[0].ret;
>> +        sl  = ac->mm[MIXDOWN_BACK  ]->ch[0].ret;
>> +        sr  = ac->mm[MIXDOWN_BACK  ]->ch[1].ret;
>> +        out = ac->interleaved_output;
>> +
>> +        // XXX dsputil-ize
>> +        if(avccontext->channels == 2) {
>> +            if(ac->pcs.pseudo_surround) {
>> +                for(i = 0; i < 1024; i++) {
>> +                    *out++ = *l++ + *c   - *sl   - *sr   + ac->add_bias;
>> +                    *out++ = *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 3;
>> +                }
>> +            } else {
>> +                for(i = 0; i < 1024; i++) {
>> +                    *out++ = *l++ + *c   + *sl++ - ac->add_bias * 2;
>> +                    *out++ = *r++ + *c++ + *sr++ - ac->add_bias * 2;
>> +                }
>> +            }
>> +
>> +        } else {
>> +            assert(avccontext->channels == 1);
>> +            for(i = 0; i < 1024; i++) {
>> +                *out++ = *l++ + *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 4;
>> +            }
>> +        }
>> +
>> +        ac->dsp.float_to_int16(data, ac->interleaved_output, 1024 * avccontext->channels);
>> +    } else {
>> +        ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);
>> +    }
>> +
>> +    return 0;
>> +}
>
> mixdown should be done prior to the IMDCT when possible and the IMDCT skipped
> for channels that are not needed, or _ALL_ mixdown code should be removed
> from the AAC decoder, as in that case mixdown can be done outside of the
> decoder equally well and cleaner.

I think doing pre-IMDCT mix-down will be complicated because of the
different windows and their overlaps. As a consequence of these issues
the optimisation may or may not be worth it. I think the overlapping
simplifications I had planned when transitioning to using imdct_half()
may help so I should probably do those first.

*** In the spec they advise against using the matrix mix-down method
so I think all this matrix mix-down code should be dropped in favour
of generic channel mixing either pre-IMDCT or post decoding, unless
someone knows of a good reason why it should be kept. I think Andreas
implemented this code but I'm not sure.

>> +
>> +
>> +static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
>> +    AACContext * ac = avccontext->priv_data;
>> +    GetBitContext gb;
>> +    enum RawDataBlockID id;
>> +    int err, tag;
>> +
>> +    init_get_bits(&gb, buf, buf_size*8);
>> +
>> +    // parse
>> +    while ((id = get_bits(&gb, 3)) != ID_END) {
>> +        tag = get_bits(&gb, 4);
>> +        err = -1;
>> +        switch (id) {
>> +
>
>> +        case ID_SCE:
>> +            if(!ac->che[ID_SCE][tag]) {
>
> this id / tag naming confuses the hell out of me ...
> Both values together identify the "thing". the id here is the
> type (channel pair / single channel / ...)

Why is it confusing? As you say, the id is the type and the tag is
(and I know you don't like this but I'm going to use it anyway because
I'm happy with it) the instance of that type.

> [...]
>> Index: libavcodec/aac.h
>> ===================================================================
>> --- libavcodec/aac.h  (revision 14624)
>> +++ libavcodec/aac.h  (working copy)
>> @@ -42,8 +44,63 @@
>>          ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
>>          size);
>>
>> +#define MAX_CHANNELS 64
>
> ok as long as its not causing huge arrays, like if there was a use like
> array[MAX_FRAME][MAX_TAGS][MAX_BANDS][MAX_CHANNELS] ...

Please define huge.

> [...]
>> @@ -60,24 +128,182 @@
>>  };
>>
>>  /**
>> + * mix-down channel types
>> + * MIXDOWN_CENTER is the index into the mix-down arrays for a Single Channel Element with AAC_CHANNEL_FRONT.
>> + * MIXDOWN_(BACK|FRONT) are the indices for Channel Pair Elements with AAC_CHANNEL_(BACK|FRONT).
>> + */
>> +enum {
>> +    MIXDOWN_CENTER,
>> +    MIXDOWN_FRONT,
>> +    MIXDOWN_BACK,
>> +};
>
> the descriptions for each should be with each not clustered before all.

OK. I think I annotated it like that because of the grammar you used
when stating that their meaning should be added.

> [...]
>> +/**
>> + * M/S joint channel coding
>> + */
>> +typedef struct {
>> +    int present;
>> +    uint8_t mask[8][64];
>> +} MidSideStereo;
>
> cant this use the channel coupling struct and code? Its doing the same thing
> i think

"coupling channel elements provide two functionalities: First,
coupling channels may be used to implement generalized intensity
stereo coding where channel spectra can be shared across channel
boundaries. Second, coupling channels may be used to dynamically
perform a downmix of one sound object into the stereo image."

I don't think they quite do the same thing. mask[][] is the only array
left in this struct now though so I'll rename it ms_mask[][] and
remove the struct.

>> +
>> +/**
>> + * Dynamic Range Control - decoded from the bitstream but not processed further.
>> + */
>> +typedef struct {
>> +    int pce_instance_tag;                           ///< Indicates with which program the DRC info is associated.
>> +    int dyn_rng_sgn[17];                            ///< DRC sign information; 0 - positive, 1 - negative
>> +    int dyn_rng_ctl[17];                            ///< DRC magnitude information
>> +    int exclude_mask[MAX_CHANNELS];                 ///< Channels to be excluded from DRC processing.
>> +    int additional_excluded_chns[MAX_CHANNELS / 7]; /**< The exclude_mask bits are
>> +                                                        coded in groups of 7 with 1 bit preceeding each group (except the first)
>> +                                                        indicating that 7 more mask bits are coded. */
>> +    int band_incr;                                  ///< Number of DRC bands greater than 1 having DRC info.
>> +    int interpolation_scheme;                       ///< Indicates the interpolation scheme used in the SBR QMF domain.
>> +    int band_top[17];                               ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
>> +    int prog_ref_level;                             /**< A reference level for the long-term program audio level for all
>> +                                                        channels combined. */
>> +} DynamicRangeControl;
>
>> +
>> +/**
>> + * pulse tool
>> + */
>
> mee, i thought the function was called like that, the struct too??

Oops, I must have missed that one. I'll remove superfluous doxygen
comments on structs.

>> +    int is_indep_coup;     ///< Set if independent coupling (i.e. after IMDCT).
>> +    int domain;            ///< Controls if coupling is performed before (0) or after (1) the TNS decoding of the target channels.
>
> wouldnt a
> int or enum where= 0 (before TNS) 1(after TNS before IMDCT) 2(after IMDCT)
> be cleaner?

Done.

>> +    int num_coupled;       ///< number of target elements
>
>> +    int is_cpe[9];         ///< Set if target is an CPE (otherwise it's an SCE).
>
> Maybe some code could be simplified if these where ID_CPE/ID_SCE ?

Do you mean if the array values were ID_CPE/ID_SCE? I don't think it
would allow any simplifications.

>> +    int tag_select[9];     ///< element tag index
>> +    int l[9];              ///< Apply gain to left channel of a CPE.
>> +    int r[9];              ///< Apply gain to right channel of a CPE.
>
> do these arrays need 9 or 8 elements?

9

> [...]
>> Index: libavcodec/aacdectab.h
>> ===================================================================
>> --- libavcodec/aacdectab.h    (revision 0)
>> +++ libavcodec/aacdectab.h    (revision 0)
>> @@ -0,0 +1,189 @@
>> +/*
>> + * AAC decoder data
>> + * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
>> + * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +/**
>> + * @file aacdectab.h
>> + * AAC decoder data
>> + * @author Oded Shimon  ( ods15 ods15 dyndns org )
>> + * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
>> + */
>> +
>> +#ifndef FFMPEG_AACDECTAB_H
>> +#define FFMPEG_AACDECTAB_H
>> +
>> +#include "aac.h"
>> +
>> +#include <stdint.h>
>> +
>> +static const uint16_t swb_offset_1024_96[] = {
>> +      0,   4,   8,  12,  16,  20,  24,  28,
>> +     32,  36,  40,  44,  48,  52,  56,  64,
>> +     72,  80,  88,  96, 108, 120, 132, 144,
>> +    156, 172, 188, 212, 240, 276, 320, 384,
>> +    448, 512, 576, 640, 704, 768, 832, 896,
>> +    960, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_96[] = {
>> +    0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_64[] = {
>> +      0,   4,   8,  12,  16,  20,  24,  28,
>> +     32,  36,  40,  44,  48,  52,  56,  64,
>> +     72,  80,  88, 100, 112, 124, 140, 156,
>> +    172, 192, 216, 240, 268, 304, 344, 384,
>> +    424, 464, 504, 544, 584, 624, 664, 704,
>> +    744, 784, 824, 864, 904, 944, 984, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_1024_48[] = {
>> +      0,   4,   8,  12,  16,  20,  24,  28,
>> +     32,  36,  40,  48,  56,  64,  72,  80,
>> +     88,  96, 108, 120, 132, 144, 160, 176,
>> +    196, 216, 240, 264, 292, 320, 352, 384,
>> +    416, 448, 480, 512, 544, 576, 608, 640,
>> +    672, 704, 736, 768, 800, 832, 864, 896,
>> +    928, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_48[] = {
>> +     0,   4,   8,  12,  16,  20,  28,  36,
>> +    44,  56,  68,  80,  96, 112, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_32[] = {
>> +      0,   4,   8,  12,  16,  20,  24,  28,
>> +     32,  36,  40,  48,  56,  64,  72,  80,
>> +     88,  96, 108, 120, 132, 144, 160, 176,
>> +    196, 216, 240, 264, 292, 320, 352, 384,
>> +    416, 448, 480, 512, 544, 576, 608, 640,
>> +    672, 704, 736, 768, 800, 832, 864, 896,
>> +    928, 960, 992, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_1024_24[] = {
>> +      0,   4,   8,  12,  16,  20,  24,  28,
>> +     32,  36,  40,  44,  52,  60,  68,  76,
>> +     84,  92, 100, 108, 116, 124, 136, 148,
>> +    160, 172, 188, 204, 220, 240, 260, 284,
>> +    308, 336, 364, 396, 432, 468, 508, 552,
>> +    600, 652, 704, 768, 832, 896, 960, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_24[] = {
>> +     0,   4,   8,  12,  16,  20,  24,  28,
>> +    36,  44,  52,  64,  76,  92, 108, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_16[] = {
>> +      0,   8,  16,  24,  32,  40,  48,  56,
>> +     64,  72,  80,  88, 100, 112, 124, 136,
>> +    148, 160, 172, 184, 196, 212, 228, 244,
>> +    260, 280, 300, 320, 344, 368, 396, 424,
>> +    456, 492, 532, 572, 616, 664, 716, 772,
>> +    832, 896, 960, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_16[] = {
>> +     0,   4,   8,  12,  16,  20,  24,  28,
>> +    32,  40,  48,  60,  72,  88, 108, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_8[] = {
>> +      0,  12,  24,  36,  48,  60,  72,  84,
>> +     96, 108, 120, 132, 144, 156, 172, 188,
>> +    204, 220, 236, 252, 268, 288, 308, 328,
>> +    348, 372, 396, 420, 448, 476, 508, 544,
>> +    580, 620, 664, 712, 764, 820, 880, 944,
>> +    1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_8[] = {
>> +     0,   4,   8,  12,  16,  20,  24,  28,
>> +    36,  44,  52,  60,  72,  88, 108, 128
>> +};
>> +
>> +static const uint16_t *swb_offset_1024[] = {
>> +    swb_offset_1024_96, swb_offset_1024_96, swb_offset_1024_64,
>> +    swb_offset_1024_48, swb_offset_1024_48, swb_offset_1024_32,
>> +    swb_offset_1024_24, swb_offset_1024_24, swb_offset_1024_16,
>> +    swb_offset_1024_16, swb_offset_1024_16, swb_offset_1024_8
>> +};
>> +
>> +static const uint16_t *swb_offset_128[] = {
>> +    /* The last entry on the following row is swb_offset_128_64 but is a
>> +       duplicate of swb_offset_128_96. */
>> +    swb_offset_128_96, swb_offset_128_96, swb_offset_128_96,
>> +    swb_offset_128_48, swb_offset_128_48, swb_offset_128_48,
>> +    swb_offset_128_24, swb_offset_128_24, swb_offset_128_16,
>> +    swb_offset_128_16, swb_offset_128_16, swb_offset_128_8
>> +};
>
> Does storing band sizes instead of band offsets lead to simpler code?
> if not, the code above is ok

grepping for 'offset' in aac.c shows quite a few more lines of
for(i=offset[idx]; i<offset[idx+1]; i++) or just offset[idx] than
lines using offset[idx+1] - offset[idx] so I think it's better as it
is.

> Btw, what does swb stand for? it should be mentioned somewhere in a doxy

scalefactor window band - term for scalefactor bands within a window,
given in Table 4.110 to Table 4.128.

scalefactor band - term for scalefactor band within a group. In case
of EIGHT_SHORT_SEQUENCE and grouping a scalefactor band may contain
several scalefactor window bands of corresponding frequency. For all
other window_sequences scalefactor bands and scalefactor window bands
are identical.

I think all that information is useful but where would be a good place
to put it?

Regards,
Rob




More information about the ffmpeg-devel mailing list