[FFmpeg-devel] [PATCH] AAC Encoder, Round 2

Michael Niedermayer michaelni
Sat Aug 23 20:18:37 CEST 2008


On Sat, Aug 23, 2008 at 06:31:30PM +0300, Kostya wrote:
> I'm back (feeling even worse than before but nm).
> 
> Here is $subj is in a form of diff against FFmpeg SVN.

> Index: aacenc.c
> ===================================================================
> --- aacenc.c	(revision 14920)
> +++ aacenc.c	(working copy)
> @@ -28,6 +28,7 @@
>   *              TODOs:
>   * psy model selection with some option
>   * add sane pulse detection
> + * add temporal noise shaping
>   ***********************************/
>  
>  #include "avcodec.h"

ok


[...]
> @@ -211,10 +235,6 @@
>          return -1;
>      }
>      s->samplerate_index = i;
> -    s->swb_sizes1024 = swb_size_1024[i];
> -    s->swb_num1024   = ff_aac_num_swb_1024[i];
> -    s->swb_sizes128  = swb_size_128[i];
> -    s->swb_num128    = ff_aac_num_swb_128[i];
>  
>      dsputil_init(&s->dsp, avctx);
>      ff_mdct_init(&s->mdct1024, 11, 0);
> @@ -229,7 +249,7 @@
>      s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
>      if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP,
>                         aac_chan_configs[avctx->channels-1][0], 0,
> -                       s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128) < 0){
> +                       swb_size_1024[i], ff_aac_num_swb_1024[i], swb_size_128[i], ff_aac_num_swb_128[i]) < 0){
>          av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n");
>          return -1;
>      }

ok



[...]

>  /**
>   * Encode ics_info element.
>   * @see Table 4.6 (syntax of ics_info)
>   */
> -static void put_ics_info(AVCodecContext *avctx, IndividualChannelStream *info)
> +static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
>  {
> -    AACEncContext *s = avctx->priv_data;

ok


[...]

> +/**
> + * Calculate the number of bits needed to code all coefficient signs in current band.
> + */
> +static int calculate_band_sign_bits(AACEncContext *s, SingleChannelElement *sce,
> +                                    int group_len, int start, int size)
> +{
> +    int bits = 0;
> +    int i, w;
> +    for(w = 0; w < group_len; w++){
> +        for(i = 0; i < size; i++){
> +            if(sce->icoefs[start + i])
> +                bits++;
> +        }
> +        start += 128;
> +    }
> +    return bits;
> +}

ok


> +
> +/**
> + * Calculate the number of bits needed to code given band with given codebook.
> + *
> + * @param s         encoder context
> + * @param sce       channel element
> + * @param group_len window group length
> + * @param start     scalefactor band position in spectral coefficients
> + * @param size      scalefactor band size
> + * @param cb        codebook number
> + */
> +static int calculate_band_bits(AACEncContext *s, SingleChannelElement *sce,
> +                               int group_len, int start, int size, int cb)
> +{
> +    int i, j, w;
> +    int bits = 0, dim, idx;
> +    int range = aac_cb_info[cb].range;
> +
> +    if(range == -1) return 0;

why is this run in a 0..11 loop with a if(0) return 0 ?
it would make more sense IMHO if this wouldnt be run with the zero band
in the first place


> +    cb--;
> +    dim = cb < FIRST_PAIR_BT ? 4 : 2;
> +
> +    if(IS_CODEBOOK_UNSIGNED(cb)){
> +        int coef_abs[2];
> +        for(w = 0; w < group_len; w++){
> +            for(i = 0; i < size; i += dim){
> +                idx = 0;
> +                for(j = 0; j < dim; j++){
> +                    coef_abs[j] = FFABS(sce->icoefs[start+i+j]);
> +                    idx = idx * range + FFMIN(coef_abs[j], 16);
> +                    if(cb == ESC_BT && coef_abs[j] > 15)
> +                        bits += av_log2(coef_abs[j]) * 2 - 4 + 1;
> +                }
> +                bits += ff_aac_spectral_bits[cb][idx];
> +            }
> +            start += 128;
> +        }

The code to calculate the number of bits is likely run several times more
often than the code to do the actual encoding thus while i do not have
a strong oppinion about merging the escape case into the unsigned one
this should either be done for both, none or only the encoding case.
While you do it only for the much more speed critical calculate_band_bits()




> +    }else{
> +        for(w = 0; w < group_len; w++){
> +            for(i = 0; i < size; i += dim){
> +                idx = sce->icoefs[start+i];
> +                for(j = 1; j < dim; j++)
> +                    idx = idx * range + sce->icoefs[start+i+j];
> +                //it turned out that all signed codebooks use the same offset for index coding
> +                idx += 40;
> +                bits += ff_aac_spectral_bits[cb][idx];
> +            }
> +            start += 128;
> +        }
> +    }
> +    return bits;
> +}
> +

> +/**
> + * Encode band info for single window group bands.
> + */
> +static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
> +                                     int win, int group_len)
> +{
> +    BandCodingPath path[64];
> +    int band_bits[64][12];
> +    int maxval;
> +    int w, swb, cb, start, start2, size;
> +    int i, j;
> +    const int max_sfb = sce->ics.max_sfb;
> +    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
> +    const int run_esc = (1 << run_bits) - 1;
> +    int bits, sbits, idx, count;
> +    int stack[64], stack_len;
> +
> +    start = win*128;
> +    for(swb = 0; swb < max_sfb; swb++){

> +        maxval = 0;
> +        start2 = start;

int maxval = 0;
...


> +        size = sce->ics.swb_sizes[swb];
> +        if(sce->zeroes[win*16 + swb])
> +            maxval = 0;
> +        else{
> +            for(w = 0; w < group_len; w++){
> +                for(i = start2; i < start2 + size; i++){
> +                    maxval = FFMAX(maxval, FFABS(sce->icoefs[i]));
> +                }
> +                start2 += 128;
> +            }
> +        }
> +        sbits = calculate_band_sign_bits(s, sce, group_len, start, size);
> +        for(cb = 0; cb < 12; cb++){

> +            if(aac_cb_info[cb].maxval < maxval)
> +                band_bits[swb][cb] = INT_MAX;
> +            else{
> +                band_bits[swb][cb] = calculate_band_bits(s, sce, group_len, start, size, cb);
> +                if(IS_CODEBOOK_UNSIGNED(cb-1)){
> +                    band_bits[swb][cb] += sbits;
> +                }
> +            }

there are many styles to place {} but putting them on one hand in cases
where they require an additional line while OTOH ommiting them in cases
where they do not makes no sense to me.
IMHO the if else should be if{ }else ...


[...]
> @@ -295,7 +648,7 @@
>                  continue;
>              }
>              for(w2 = w; w2 < w + sce->ics.group_len[wg]; w2++){
> -                encode_band_coeffs(s, cpe, channel, start + w2*128,
> +                encode_band_coeffs(s, sce, start + w2*128,
>                                     sce->ics.swb_sizes[i],
>                                     sce->band_type[w*16 + i]);
>              }

ok

psy model will be reviewed ASAP


[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

> ... defining _GNU_SOURCE...
For the love of all that is holy, and some that is not, don't do that.
-- Luca & Mans
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080823/9018b412/attachment.pgp>



More information about the ffmpeg-devel mailing list