[FFmpeg-devel] AAC decoder round 6

Robert Swain robert.swain
Sun Aug 10 13:31:31 CEST 2008


2008/8/9 Michael Niedermayer <michaelni at gmx.at>:
> On Sat, Aug 09, 2008 at 12:25:09PM +0100, Robert Swain wrote:
>> $subj
> [...]
>
>> +const uint8_t ff_aac_num_swb_1024[] = {
>> +    41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40
>> +};
>> +
>> +const uint8_t ff_aac_num_swb_128[] = {
>> +    12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15
>> +};
>> +
>
> dont these 2 belong to the swb offset tables? if so i think they should
> be in the same file unless iam missing something either way these 2 are
> ok [with static if they are only used by 1 file]

Kostya wanted these two tables but not the rest of the swb tables so I
made these shared and the others not. I can move them if you wish.

>> Index: libavcodec/aac.c
>> ===================================================================
>> --- libavcodec/aac.c  (revision 14674)
>> +++ libavcodec/aac.c  (working copy)
>> @@ -99,6 +99,100 @@
>>  static VLC vlc_spectral[11];
>>
>>
> [...]
>> +/**
>> + * Configure output channel order based on the current program configuration element.
>> + *
>> + * @param   che_pos current channel position configuration
>> + * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
>> + *
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int output_configure(AACContext *ac, enum ChannelPosition che_pos[4][MAX_ELEM_ID],
>> +        enum ChannelPosition new_che_pos[4][MAX_ELEM_ID]) {
>> +    AVCodecContext *avctx = ac->avccontext;
>> +    int i, j, channels = 0;
>> +
>> +    if(!memcmp(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0])))
>> +        return 0; /* no change */
>> +
>> +    memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
>> +
>> +    /* Allocate or free elements depending on if they are in the
>> +     * current program configuration.
>> +     *
>> +     * Set up default 1:1 output mapping.
>> +     *
>> +     * For a 5.1 stream the output order will be:
>> +     *    [ Front Left ] [ Front Right ] [ Center ] [ LFE ] [ Surround Left ] [ Surround Right ]
>> +     */
>> +
>> +    for(i = 0; i < MAX_ELEM_ID; i++) {
>> +        for(j = 0; j < 4; j++) {
>> +            if(che_pos[j][i]) {
>> +                if(!ac->che[j][i] && !(ac->che[j][i] = av_mallocz(sizeof(ChannelElement))))
>> +                    return AVERROR(ENOMEM);
>> +                if(j != TYPE_CCE) {
>
> i think j should be renamed to type

OK.

> [...]
>> +
>> +/**
>> + * Decode GA "General Audio" specific configuration; reference: table 4.1.
>> + *
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_ga_specific_config(AACContext * ac, GetBitContext * gb, int channel_config) {
>> +    enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
>> +    int extension_flag, ret;
>> +
>> +    if(get_bits1(gb)) {  // frameLengthFlag
>> +        av_log(ac->avccontext, AV_LOG_ERROR, "960/120 MDCT window is not supported.\n");
>
> is this "update to latest svn; sample welcome or patch welcome" ?
> i think we have a function to print such messages, i dont remember what its
> name was though

Is that a joke? I don't really understand what you're trying to point
out. It's more "These files don't really occur in the wild and we
haven't written any support for them."

>> +
>> +/**
>> + * linear congruential pseudorandom number generator
>> + *
>> + * @param   state   pointer to the current state of the generator
>> + *
>> + * @return  Returns a 32-bit pseudorandom integer
>> + */
>> +static inline int32_t lcg_random(int32_t *state) {
>> +    *state = *state * 1664525 + 1013904223;
>> +    return *state;
>> +}
>
> always_inline, besides
>
> int lcg_random(int previous_val)
> state=array[i] = lcg_random(state);
>
> is IMHO cleaner

Done.

> [...]
>> +    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
>> +        int i;
>> +        ics->max_sfb = get_bits(gb, 4);
>
>> +        grouping = get_bits(gb, 7);
>> +        for (i = 0; i < 7; i++) {
>> +            if (grouping & (1<<(6-i))) {
>
> isnt this if(get_bits1()) ?

Yes. Done.

>> +                ics->group_len[ics->num_window_groups-1]++;
>> +            } else {
>> +                ics->num_window_groups++;
>> +                ics->group_len[ics->num_window_groups-1] = 1;
>> +            }
>> +        }
>
>
>> +        ics->swb_offset =     swb_offset_128[ac->m4ac.sampling_index];
>> +        ics->num_swb    = ff_aac_num_swb_128[ac->m4ac.sampling_index];
>> +    } else {
>> +        ics->max_sfb    =                              get_bits(gb, 6);
>> +        ics->swb_offset =     swb_offset_1024[ac->m4ac.sampling_index];
>> +        ics->num_swb    = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
>> +    }
>
> is there something that prevents sampling_index from being >=12 ?
> a quick look at ff_mpeg4audio_get_config() indicates that it can at least
> be 15.

It's checked in decode_pce() but that is only called if the channel
config changes. I've added another check after the call to
ff_mpeg4audio_get_config() so now it is checked in both places where a
sampling index is parsed.

>> +
>> +    if(ics->max_sfb > ics->num_swb) {
>> +        av_log(ac->avccontext, AV_LOG_ERROR,
>> +            "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
>> +            ics->max_sfb, ics->num_swb);
>> +        ics->max_sfb = 0;
>> +        ics->num_swb = 0;
>> +        return -1;
>> +    }
>> +
>
>> +    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
>> +        ics->num_windows   = 8;
>> +        ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index];
>> +    } else {
>> +        ics->num_windows   = 1;
>> +        ics->tns_max_bands = tns_max_bands_1024[ac->m4ac.sampling_index];
>> +        if (get_bits1(gb)) {
>> +            av_log(ac->avccontext, AV_LOG_ERROR,
>> +                   "Predictor bit set but LTP is not supported.\n");
>> +            return -1;
>> +        }
>> +    }
>
> why is this split from the first part?

It was split either side of the refactored max_sfb/num_swb validation.
I can merge them but then if the max_sfb/num_swb check fails, should
these values also be zeroed?

> [...]
>> +    int g, i, idx = 0;
>> +    if (ms_present == 1) {
>
>> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)
>> +            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++, idx++)
>> +                cpe->ms_mask[idx] = get_bits1(gb);// << i;
>
> what does the '// << i;' mean?

I don't know but it's been there as long as I've seen the code. I'll remove it.

> besides, isnt this just
>
> for (i = 0; i < cpe->ch[0].ics.max_sfb * cpe->ch[0].ics.num_window_groups; i++)
>    cpe->ms_mask[i] = get_bits1(gb);
>
> ?

Yes, changed.

>> +    } else if (ms_present == 2) {
>> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)
>> +            memset(&cpe->ms_mask[g*cpe->ch[0].ics.max_sfb], 1, cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
>
> similar to above the for seems unneeded

Done.

> [...]
>
>> @@ -330,10 +790,286 @@
>>  }
>>
>>  /**
>> - * Parse Spectral Band Replication extension data; reference: table 4.55.
>> + * Dequantize and scale spectral data; reference: 4.6.3.3.
>>   *
>> + * @param   icoef       array of quantized spectral data
>> + * @param   band_type   array of the used band type
>> + * @param   sf          array of scalefactors or intensity stereo positions
>> + * @param   coef        array of dequantized, scaled spectral data
>> + */
>> +static void dequant(AACContext * ac, float coef[1024], const int icoef[1024], float sf[120],
>> +        const IndividualChannelStream * ics, enum BandType band_type[120]) {
>> +    const uint16_t * offsets = ics->swb_offset;
>> +    const int c = 1024/ics->num_window_groups;
>> +    int g, i, group, k, idx = 0;
>> +
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(c - offsets[ics->max_sfb]));
>> +        for (i = 0; i < ics->max_sfb; i++, idx++) {
>> +            if (band_type[idx] == NOISE_BT) {
>> +                const float scale = sf[idx] / ((offsets[i+1] - offsets[i]) * PNS_MEAN_ENERGY);
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>> +                    for (k = offsets[i]; k < offsets[i+1]; k++)
>> +                        coef[group*128+k] = lcg_random(&ac->random_state) * scale;
>> +                }
>> +            } else if (band_type[idx] != INTENSITY_BT && band_type[idx] != INTENSITY_BT2) {
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>> +                    for (k = offsets[i]; k < offsets[i+1]; k++) {
>> +                        coef[group*128+k] = ivquant(icoef[group*128+k]) * sf[idx];
>> +                    }
>> +                }
>> +            }
>> +        }
>> +        coef  += ics->group_len[g]*128;
>> +        icoef += ics->group_len[g]*128;
>> +    }
>> +}
>
> it might be better to add 128 just outside the innermost loop to avoid
> the "group*128 +" inside

They would need to be reset the end of each iteration over i. I moved
the group loops inside the conditions recently as I recall. What do
you suggest? Add 128 to coef and icoef through each iteration over
group and then reset?

>> +
>> +/**
>> + * Mid/Side stereo decoding; reference: 4.6.8.1.3.
>> + */
>> +static void apply_mid_side_stereo(ChannelElement * cpe) {
>> +    const IndividualChannelStream * ics = &cpe->ch[0].ics;
>> +    float *ch0 = cpe->ch[0].coeffs;
>> +    float *ch1 = cpe->ch[1].coeffs;
>> +    int g, i, k, gp, idx = 0;
>> +    const uint16_t * offsets = ics->swb_offset;
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        for (i = 0; i < ics->max_sfb; i++, idx++) {
>> +            if (cpe->ms_mask[idx] &&
>> +                cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
>> +                for (gp = 0; gp < ics->group_len[g]; gp++) {
>> +                    for (k = offsets[i]; k < offsets[i+1]; k++) {
>> +                        float tmp = ch0[gp*128 + k] - ch1[gp*128 + k];
>> +                        ch0[gp*128 + k] += ch1[gp*128 + k];
>> +                        ch1[gp*128 + k] = tmp;
>> +                    }
>> +                }
>> +            }
>> +        }
>> +        ch0 += ics->group_len[g]*128;
>> +        ch1 += ics->group_len[g]*128;
>
> it might be better to add 128 just outside the innermost loop to avoid
> the "gp*128 +" inside, this also applies to several other functions!
>
> besides dequant() uses group instead of gp, this is very minor but inconsisant

Made variables consistent.

> [...]
>> +
>> +/**
>> + * Decode coupling_channel_element; reference: table 4.8.
>> + *
>> + * @param   elem_id Identifies the instance of a syntax element.
>> + *
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_cce(AACContext * ac, GetBitContext * gb, int elem_id) {
>> +    int num_gain = 0;
>> +    int c, g, sfb, ret, idx = 0;
>> +    int is_indep_coup, domain, sign;
>> +    float scale;
>> +    SingleChannelElement * sce;
>> +    ChannelCoupling * coup;
>> +
>> +    sce = &ac->che[TYPE_CCE][elem_id]->ch[0];
>> +    sce->mixing_gain = 1.0;
>> +
>> +    coup = &ac->che[TYPE_CCE][elem_id]->coup;
>> +
>> +    is_indep_coup = get_bits1(gb);
>> +    coup->num_coupled = get_bits(gb, 3);
>> +    for (c = 0; c <= coup->num_coupled; c++) {
>> +        num_gain++;
>
>> +        coup->is_cpe[c] = get_bits1(gb);
>> +        coup->id_select[c] = get_bits(gb, 4);
>> +        if (coup->is_cpe[c]) {
>> +            coup->ch_select[c] = get_bits(gb, 2);
>> +            if (coup->ch_select[c] == 3)
>> +                num_gain++;
>> +        }
>
> i thought about this again and i think the following is overall cleaner:
>
> coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
> coup->id_select[c] = get_bits(gb, 4);
> if (coup->type[c] == TYPE_CPE) {
>    coup->ch_select[c] = get_bits(gb, 2);
>    if (coup->ch_select[c] == 3)
>        num_gain++;
> } else
>    coup->ch_select[c] = 1;

Done.

>> +    }
>> +    domain = get_bits1(gb);
>> +
>> +    if (is_indep_coup) {
>> +        coup->coupling_point = AFTER_IMDCT;
>> +    } else if(domain) {
>> +        coup->coupling_point = BETWEEN_TNS_AND_IMDCT;
>> +    } else
>> +        coup->coupling_point = BEFORE_TNS;
>
> :/ i thought the bits were at least stored together, what morons designed
> this!?
>
> coup->coupling_point= get_bits1(gb);
> ...
> coup->coupling_point+= 2*get_bits1(gb);
>
> still is a possibility though that is cleaner
> btw, what is the 4th possibility for?

is_indep_coup = 1 overrides whatever domain is so the 4th case is
redundant. Though theoretically, domain must be 1 if is_indep_coup is
1. is_indep_coup = 1 and domain = 0 is theoretically invalid.

coup->coupling_point = 2*get_bits1(gb);
...
coup->coupling_point += get_bits1(gb) && !coup->coupling_point;
?

>> +
>> +    sign = get_bits(gb, 1);
>> +    scale = pow(2., pow(2., get_bits(gb, 2) - 3));
>> +
>> +    if ((ret = decode_ics(ac, sce, gb, 0, 0)))
>> +        return ret;
>> +
>> +    for (c = 0; c < num_gain; c++) {
>> +        int cge = 1;
>> +        int gain = 0;
>> +        float gain_cache = 1.;
>> +        if (c) {
>> +            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
>> +            gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
>> +            gain_cache = pow(scale, gain);
>> +        }
>> +        for (g = 0; g < sce->ics.num_window_groups; g++)
>> +            for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++)
>> +                if (sce->band_type[idx] == ZERO_BT) {
>> +                    coup->gain[c][idx] = 0;
>> +                } else {
>> +                    if (cge) {
>> +                        coup->gain[c][idx] = gain_cache;
>> +                    } else {
>> +                        int s, t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
>> +                        if (sign) {
>> +                            s = 1 - 2 * (t & 0x1);
>> +                            t >>= 1;
>> +                        } else
>> +                            s = 1;
>> +                        gain += t;
>> +                        coup->gain[c][idx] = pow(scale, gain) * s;
>
> I think t==0 should be handled as a special case to skip the pow()
> maybe something like:
>
> if(!cge){
>    t=get_vlc2() ...
>    if(t){
>        ...
>        gain += t;
>        gain_cache= pow(scale, gain) * s;
>    }
> }
> coup->gain[c][idx] = gain_cache;

OK.

> also maybe the == ZERO_BT check is useless, i think the code that applies
> the coupling checks for it before using the values.

Indeed it does, but it still needs to be checked to skip this code in
the case that the band_type is ZERO_BT. So I changed it to:

-                if (sce->band_type[idx] == ZERO_BT) {
-                    coup->gain[c][idx] = 0;
-                } else {
+                if (sce->band_type[idx] != ZERO_BT) {

>> +                    }
>> +                }
>> +    }
>> +    return 0;
>> +}
>> +
>
>
>> @@ -343,6 +1079,86 @@
>>      return cnt;
>>  }
>>
>> +/**
>> + * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
>> + *
>> + * @return  Returns number of bytes consumed.
>> + */
>> +static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, GetBitContext * gb) {
>> +    int i;
>> +    int n = 0;
>> +    int num_excl_chan = 0;
>> +
>> +    do {
>> +        for (i = 0; i < 7; i++)
>> +            che_drc->exclude_mask[num_excl_chan + i] = get_bits1(gb);
>> +        n++;
>> +        num_excl_chan += 7;
>> +    } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
>> +
>> +    return n;
>> +}
>
> do {
>    for (i = 0; i < 7; i++)
>        che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
> } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
>
> return num_excl_chan/7;

OK.

>> +    if (ics->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
>> +        ff_imdct_calc(&ac->mdct, buf, in, out); // Out can be abused, for now, as a temp buffer.
>> +        if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
>> +            ac->dsp.vector_fmul_add_add(out, buf, lwindow_prev, saved, ac->add_bias, 1024, 1);
>> +        } else {
>> +            for (i = 0;   i < 448;  i++)   out[i] =          saved[i] + ac->add_bias;
>> +            ac->dsp.vector_fmul_add_add(out + 448, buf + 448, swindow_prev, saved + 448, ac->add_bias, 128, 1);
>> +            for (i = 576; i < 1024; i++)   out[i] = buf[i] + saved[i] + ac->add_bias;
>> +        }
>> +        if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
>
> instead of
> if(!X)
> else
>
> i think
> if(X)
> else
>
> would be more readable

OK.

>> +            ac->dsp.vector_fmul_reverse(saved, buf + 1024, lwindow, 1024);
>> +        } else {
>> +            memcpy(saved, buf + 1024, 448 * sizeof(float));
>> +            ac->dsp.vector_fmul_reverse(saved + 448, buf + 1024 + 448, swindow, 128);
>> +            memset(saved + 576, 0, 448 * sizeof(float));
>> +        }
>> +    } else {
>> +        if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
>> +            av_log(ac->avccontext, AV_LOG_WARNING,
>> +                   "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
>> +                   "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
>> +        for (i = 0; i < 2048; i += 256) {
>> +            ff_imdct_calc(&ac->mdct_small, buf + i, in + i/2, out);
>> +            ac->dsp.vector_fmul_reverse(ac->revers + i/2, buf + i + 128, swindow, 128);
>> +        }
>> +        for (i = 0; i < 448; i++)   out[i] = saved[i] + ac->add_bias;
>> +
>> +        ac->dsp.vector_fmul_add_add(out + 448 + 0*128, buf + 0*128, swindow_prev, saved + 448 ,       ac->add_bias, 128, 1);
>> +        ac->dsp.vector_fmul_add_add(out + 448 + 1*128, buf + 2*128, swindow,      ac->revers + 0*128, ac->add_bias, 128, 1);
>> +        ac->dsp.vector_fmul_add_add(out + 448 + 2*128, buf + 4*128, swindow,      ac->revers + 1*128, ac->add_bias, 128, 1);
>> +        ac->dsp.vector_fmul_add_add(out + 448 + 3*128, buf + 6*128, swindow,      ac->revers + 2*128, ac->add_bias, 128, 1);
>> +        ac->dsp.vector_fmul_add_add(out + 448 + 4*128, buf + 8*128, swindow,      ac->revers + 3*128, ac->add_bias,  64, 1);
>> +
>> +#if 0
>> +        vector_fmul_add_add_add(&ac->dsp, out + 448 + 1*128, buf + 2*128, swindow,      saved + 448 + 1*128, ac->revers + 0*128, ac->add_bias, 128);
>> +        vector_fmul_add_add_add(&ac->dsp, out + 448 + 2*128, buf + 4*128, swindow,      saved + 448 + 2*128, ac->revers + 1*128, ac->add_bias, 128);
>> +        vector_fmul_add_add_add(&ac->dsp, out + 448 + 3*128, buf + 6*128, swindow,      saved + 448 + 3*128, ac->revers + 2*128, ac->add_bias, 128);
>> +        vector_fmul_add_add_add(&ac->dsp, out + 448 + 4*128, buf + 8*128, swindow,      saved + 448 + 4*128, ac->revers + 3*128, ac->add_bias, 64);
>> +#endif
>> +
>> +        ac->dsp.vector_fmul_add_add(saved,       buf + 1024 + 64,    swindow + 64, ac->revers + 3*128+64,  0, 64, 1);
>> +        ac->dsp.vector_fmul_add_add(saved + 64,  buf + 1024 + 2*128, swindow,      ac->revers + 4*128,     0, 128, 1);
>> +        ac->dsp.vector_fmul_add_add(saved + 192, buf + 1024 + 4*128, swindow,      ac->revers + 5*128,     0, 128, 1);
>> +        ac->dsp.vector_fmul_add_add(saved + 320, buf + 1024 + 6*128, swindow,      ac->revers + 6*128,     0, 128, 1);
>> +        memcpy(                     saved + 448, ac->revers + 7*128, 128 * sizeof(float));
>
>> +        memset(                     saved + 576, 0,                  448 * sizeof(float));
>
> is that really needed? I mean if the data isnt used it wouldnt be but if its
> used then windowing and adding zeros seems rather like wasted cpu time.
> also this applies to all the memset in thus function.

In some cases these zeros are being added. Working around it using
logic based on the window sequences is mostly OK until
window_sequence[0] is not eight short and not long stop. That is when
this code is encountered:

ac->dsp.vector_fmul_add_add(out, buf, lwindow_prev, saved,
ac->add_bias, 1024, 1);

It seems a bit weird to make this code read as attached
(20080810-1202-windowing_avoid_adding_zeros.diff).

When porting to imdct_half(), Loren suggested that I focus on the
transitions and assume there are only long-to-long or short-to-short
transitions and any long-to-short or short-to-long transitions are
treated as short-to-short. This simplifies the cases significantly. I
can make a patch to do this if desirable. In fact, this could be the
first stage of changes to imdct_and_windowing() when porting to
imdct_half() after all this is in trunk, if you wish.

I intend to revisit my imdct_half() porting efforts shortly but I
won't work on that right now. I had it working but there was a bug so
it would screech for a moment then play fine then screech again at
irregular intervals. So I think I got one or more of the cases wrong.

>> +    }
>> +}
>> +
>> +/**
>>   * Apply dependent channel coupling (applied before IMDCT).
>>   *
>>   * @param   index   index into coupling gain array
>> @@ -409,6 +1354,180 @@
>>          sce->ret[i] += gain * (cc->ch[0].ret[i] - ac->add_bias);
>>  }
>>
>> +/**
>> + * channel coupling transformation interface
>> + *
>> + * @param   index   index into coupling gain array
>> + * @param   apply_coupling_method   pointer to (in)dependent coupling function
>> + */
>> +static void apply_channel_coupling(AACContext * ac, ChannelElement * cc,
>> +        void (*apply_coupling_method)(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index))
>> +{
>> +    int c;
>> +    int index = 0;
>> +    ChannelCoupling * coup = &cc->coup;
>> +    for (c = 0; c <= coup->num_coupled; c++) {
>
>> +        if (     !coup->is_cpe[c] && ac->che[TYPE_SCE][coup->id_select[c]]) {
>> +            apply_coupling_method(ac, &ac->che[TYPE_SCE][coup->id_select[c]]->ch[0], cc, index++);
>> +        } else if(coup->is_cpe[c] && ac->che[TYPE_CPE][coup->id_select[c]]) {
>> +            if (coup->ch_select[c] != 2) {
>> +                apply_coupling_method(ac, &ac->che[TYPE_CPE][coup->id_select[c]]->ch[0], cc, index);
>> +                if (coup->ch_select[c] != 0)
>> +                    index++;
>> +            }
>> +            if (coup->ch_select[c] != 1)
>> +                apply_coupling_method(ac, &ac->che[TYPE_CPE][coup->id_select[c]]->ch[1], cc, index++);
>
> if (ac->che[coup->type][coup->id_select[c]]) {
>    if (coup->ch_select[c] != 2) {
>        apply_coupling_method(ac, &ac->che[coup->type][coup->id_select[c]]->ch[0], cc, index);
>        if (coup->ch_select[c] != 0)
>            index++;
>    }
>    if (coup->ch_select[c] != 1)
>        apply_coupling_method(ac, &ac->che[coup->type][coup->id_select[c]]->ch[1], cc, index++);

OK.

>> +        } else {
>> +            av_log(ac->avccontext, AV_LOG_ERROR,
>> +                   "coupling target %sE[%d] not available\n",
>> +                   coup->is_cpe[c] ? "CP" : "SC", coup->id_select[c]);
>> +            break;
>> +        }
>> +    }
>> +}
>> +
>
> [...]
>> +/**
>> + * Conduct float to int16 conversion.
>> + *
>> + * @param   data        pointer to output data
>> + * @param   data_size   output data size in bytes
>> + *
>> + * @return  Returns error status. 0 - OK, !0 - error
>> + */
>> +static int convert_to_int16(AVCodecContext * avccontext, uint16_t * data, int * data_size) {
>> +    AACContext * ac = avccontext->priv_data;
>> +    int i;
>> +
>> +    if (!ac->is_saved) {
>> +        ac->is_saved = 1;
>> +        *data_size = 0;
>> +        return 0;
>> +    }
>> +
>> +    i = 1024 * avccontext->channels * sizeof(int16_t);
>> +    if(*data_size < i) {
>> +        av_log(avccontext, AV_LOG_ERROR,
>> +               "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
>> +               *data_size, i);
>> +        return -1;
>> +    }
>> +    *data_size = i;
>> +
>> +    ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);
>> +
>> +    return 0;
>> +}
>
> I wonder if it would be cleaner if this where just inlined intead of a
> seperate function, either way its ok

Moved.

>> +
>> +static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
>> +    AACContext * ac = avccontext->priv_data;
>> +    GetBitContext gb;
>> +    enum RawDataBlockType elem_type;
>> +    int err, elem_id;
>> +
>> +    init_get_bits(&gb, buf, buf_size*8);
>> +
>> +    // parse
>> +    while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
>> +        elem_id = get_bits(&gb, 4);
>> +        err = -1;
>> +        switch (elem_type) {
>> +
>> +        case TYPE_SCE:
>> +            if(!ac->che[TYPE_SCE][elem_id]) {
>> +                if(elem_id == 1 && ac->che[TYPE_LFE][0]) {
>> +                    /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
>> +                       instead of SCE[0] CPE[0] CPE[0] LFE[0].
>> +                       If we seem to have encountered such a stream,
>> +                       transfer the LFE[0] element to SCE[1] */
>> +                    ac->che[TYPE_SCE][elem_id] = ac->che[TYPE_LFE][0];
>> +                    ac->che[TYPE_LFE][0] = NULL;
>> +                } else
>> +                    break;
>> +            }
>> +            err = decode_ics(ac, &ac->che[TYPE_SCE][elem_id]->ch[0], &gb, 0, 0);
>> +            break;
>> +
>> +        case TYPE_CPE:
>> +            if (ac->che[TYPE_CPE][elem_id])
>> +                err = decode_cpe(ac, &gb, elem_id);
>> +            break;
>> +
>> +        case TYPE_FIL:
>> +            if (elem_id == 15)
>> +                elem_id += get_bits(&gb, 8) - 1;
>> +            while (elem_id > 0)
>> +                elem_id -= decode_extension_payload(ac, &gb, elem_id);
>> +            err = 0; /* FIXME */
>> +            break;
>> +
>> +        case TYPE_PCE:
>> +        {
>> +            enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
>> +            memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
>> +            if((err = decode_pce(ac, new_che_pos, &gb)))
>> +                break;
>> +            err = output_configure(ac, ac->che_pos, new_che_pos);
>> +            break;
>> +        }
>> +
>> +        case TYPE_DSE:
>> +            skip_data_stream_element(&gb);
>> +            err = 0;
>> +            break;
>> +
>> +        case TYPE_CCE:
>> +            if (ac->che[TYPE_CCE][elem_id])
>> +                err = decode_cce(ac, &gb, elem_id);
>> +            break;
>> +
>> +        case TYPE_LFE:
>> +            if (ac->che[TYPE_LFE][elem_id])
>> +                err = decode_ics(ac, &ac->che[TYPE_LFE][elem_id]->ch[0], &gb, 0, 0);
>> +            break;
>
> these checks could be factorized out
> if(elem_type < C && !ac->che[elem_type][elem_id])
>    return -1;

The TYPE_SCE case has something slightly different so it will have to be:

if(elem_type && elem_type < TYPE_DSE && !ac->che[elem_type][elem_id])
    return -1;

Also, I'll reorder the cases such that they're in the order of the
enum. It will make things like this easier to spot.

> [...]
>>  /**
>> + * Single Channel Element - used for both SCE and LFE elements.
>> + */
>> +typedef struct {
>
>> +    float mixing_gain;                        /**< Channel gain (not used by AAC bitstream).
>> +                                               *   Note that this is applied before joint stereo decoding.
>> +                                               *   Thus, when used inside CPE elements, both channels must have equal gain.
>> +                                               */
>
> This variable is always 1 thus unneeded

Done.

Regards,
Rob
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 20080810-1202-windowing_avoid_adding_zeros.diff
Type: text/x-diff
Size: 2305 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080810/9dde5faf/attachment.diff>



More information about the ffmpeg-devel mailing list