[FFmpeg-devel] [PATCH 1/4] libavcodec: Implementation of AC3 fixed point decoder.

Tue Oct 16 15:53:25 CEST 2012

Hello Vitor,

>Hello!
>
>>>> AC3 fixed point decoder is based on AC3 floating point
>>>>    decoder that is already part of FFmpeg.
>>>>
>>>> It does not use FFmpegs FFT. It uses  FFT  developed for
>>>>    optimization of floating point AC3 decoder and because
>>>>    of that currently some of the files that implement this
>>>>    FFT are located in libavcodec/mips folder.
>>>
>>>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>>>> index 9b86f7c..cf88e48 100644
>>>> --- a/libavcodec/Makefile
>>>> +++ b/libavcodec/Makefile
>>>> @@ -80,7 +80,8 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
>>>>                                              mpeg4audio.o kbdwin.o  \
>>>>                                              audio_frame_queue.o
>>>>    OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
>>>> -OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
>>>> +OBJS-$(CONFIG_AC3_DECODER)             += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o
>>>> +OBJS-$(CONFIG_AC3_FIXED_DECODER)       += fft_ac3_init_tab.o fft_ac3_fixed.o ac3dec_fixed.o ac3dec_data.o ac3.o kbdwin.o
>>>>    OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3enc.o ac3tab.o \
>>>>                                              ac3.o kbdwin.o
>>>>    OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
>>>> diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
>>>> index b9f34b9..0a1ff23 100644
>>>> --- a/libavcodec/ac3.h
>>>> +++ b/libavcodec/ac3.h
>>>> @@ -27,6 +27,10 @@
>>>>    #ifndef AVCODEC_AC3_H
>>>>    #define AVCODEC_AC3_H
>>>>
>>>> +#ifndef CONFIG_AC3_FIXED
>>>> +#   define CONFIG_AC3_FIXED 0
>>>> +#endif
>>>
>>> I don't think this is needed.
>>
>> But how can I distinguish between fixed and floating point macros without this?
>> Should I use just #ifdef CONFIG_AC3_FIXED?
>
>Isn't CONFIG_AC3_FIXED already set to 0/1 by configure?
>

You are correct... This will be removed...

>>>>    #define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
>>>>    #define AC3_MAX_CHANNELS 7            /**< maximum number of channels, including coupling channel */
>>>>    #define CPL_CH 0                      /**< coupling channel index */
>>>> @@ -51,6 +55,41 @@
>>>>    #define EXP_D25   2
>>>>    #define EXP_D45   3
>>>>
>>>> +#if CONFIG_AC3_FIXED
>>>> +
>>>> +#define CONFIG_FFT_FLOAT 0
>>>> +
>>>> +/* pre-defined gain values */
>>>> +#define LEVEL_PLUS_3DB          5793
>>>> +#define LEVEL_PLUS_1POINT5DB    4871
>>>> +#define LEVEL_MINUS_1POINT5DB   3444
>>>> +#define LEVEL_MINUS_3DB         2896
>>>> +#define LEVEL_MINUS_4POINT5DB   2435
>>>> +#define LEVEL_MINUS_6DB         2048
>>>> +#define LEVEL_MINUS_9DB         1448
>>>> +#define LEVEL_ZERO              0
>>>> +#define LEVEL_ONE               4096
>>>> +
>>>> +#define MUL_BIAS1 65536
>>>> +#define MUL_BIAS2 2147418112
>>>
>>> If this is just the floating-point values converted to fixed, it is
>>> better to use a macro to define it only once. See FIXR() macro in
>>> mpegaudiodec.c.
>>>
>>
>> You are correct and I will create macro for this.
>>
>>>> +#define AC3_RENAME(x)           x ## _fixed
>>>
>>>> +#define AC3_CENTER(x)           center_levels[x]
>>>> +#define AC3_SURROUND(x)         surround_levels[x]
>>>
>>> Those two looks unused.
>>>
>>
>> They are leftovers from previous version of code. Sorry.
>> I will remove them.
>>
>>>> +#define AC3_LEVEL(x)            ((x)*23170 + 0x4000) >> 15
>>>> +#define AC3_NORM(x,norm)        ((x)<<12)/norm
>>>
>>>> +#define AC3_DYNAMIC_RANGE(x)    (x)
>>>
>>> This looks very different from the float version, how can it work?
>>>
>>
>> The way in which this is used in fixed point version of code is different
>> from the way it is used in floating point version. You can see in
>> function scale_coefs how dynamic range is used.
>>
>>>> +#define AC3_SPX_BLEND(x)        (x)
>>>
>>>> +#define TYPE_PREFIX(x)          fixed_ ## x
>>>
>>> Why not use sulfix everywhere, for simplicity and consistency?
>>
>> I didn't want to change the name of the function float_to_int16_interleave
>> that was created before this decoder (and its name is more descriptive this way).
>
>Some people are working to make AC3 decoder output planar data, so this 
>should be a non-issue soon.
>

Should we leave prefix then?

>>>
>>>> +#define AC3_DYNAMIC_RANGE1   0
>>>
>>> Hmm, that is strange.
>>
>> Similar as above. The way that dynamic range is used in this decoder is
>> different from the way it is used in floating point decoder.
>
>Why? If it is simply better, change also the float version to use the 
>new way.

For floating point version this is not so good... If we use this for it, there will be 
more operations that are not necessary now (for fixed point implementation we 
can use shift, for float we would need to use multiplication)

>
>>>>        for (i = 0; i < s->fbw_channels; i++) {
>>>>            norm0 += s->downmix_coeffs[i][0];
>>>>            norm1 += s->downmix_coeffs[i][1];
>>>>        }
>>>
>>>> -    norm0 = 1.0f / norm0;
>>>> -    norm1 = 1.0f / norm1;
>>>> +
>>>>        for (i = 0; i < s->fbw_channels; i++) {
>>>> -        s->downmix_coeffs[i][0] *= norm0;
>>>> -        s->downmix_coeffs[i][1] *= norm1;
>>>> +        s->downmix_coeffs[i][0] = AC3_NORM(s->downmix_coeffs[i][0],norm0);
>>>> +        s->downmix_coeffs[i][1] = AC3_NORM(s->downmix_coeffs[i][1],norm1);
>>>>        }
>>>
>>> Division is much slower than multiplication. You can do the same trick
>>> in fixed point:
>>>
>>> norm0 = (1 << bits) / norm0;
>>> s->downmix_coeffs[i][0] = MUL(s->downmix_coeffs[i][0],norm0);
>>>
>>> where
>>>
>>> #define MUL(a,b) (((int64_t) (a)) * (b)) >> (s))
>>>
>>
>> I am not sure that accuracy of fixed point code would be ok if division is moved out
>> from the loop
>
>If you use intermediate 64-bit values, this should impact very little 
>the precision.
>

Ok. We will try this approach then.

>>>>        if (s->output_mode == AC3_CHMODE_MONO) {
>>>>            for (i = 0; i < s->fbw_channels; i++)
>>>> -            s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] +
>>>> -                                       s->downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
>>>> +            s->downmix_coeffs[i][0] = AC3_LEVEL(s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]);
>>>>        }
>>>>    }
>>>
>>> Hmm, is this correct?
>>
>> Not exactly, although it passes fate tests. I will have to add back multiplication with LEVEL_MINUS_3DB.
>>

I was wrong here. This is ok since I created macro AC3_LEVEL that works this.

>>>
>>>> @@ -602,51 +605,25 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
>>>>        for (ch = 1; ch <= channels; ch++) {
>>>>            if (s->block_switch[ch]) {
>>>>                int i;
>>>> -            float *x = s->tmp_output + 128;
>>>> +            FFTSample *x = s->tmp_output+128;
>>>>                for (i = 0; i < 128; i++)
>>>>                    x[i] = s->transform_coeffs[ch][2 * i];
>>>> -            s->imdct_256.imdct_half(&s->imdct_256, s->tmp_output, x);
>>>> -            s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
>>>> +            s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->tmp_output, x);
>>>> +            s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>>>>                                          s->tmp_output, s->window, 128);
>>>>                for (i = 0; i < 128; i++)
>>>>                    x[i] = s->transform_coeffs[ch][2 * i + 1];
>>>> -            s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch - 1], x);
>>>> +            s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->delay[ch - 1], x);
>>>>            } else {
>>>> -            s->imdct_512.imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
>>>> -            s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
>>>> +            s->imdct_512.AC3_RENAME(imdct_half)(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
>>>> +            s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>>>>                                          s->tmp_output, s->window, 128);
>>>> -            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(float));
>>>> +            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(FFTSample));
>>>>            }
>>>>        }
>>>>    }
>>>>
>>>> -/**
>>>> - * Upmix delay samples from stereo to original channel layout.
>>>> - */
>>>> -static void ac3_upmix_delay(AC3DecodeContext *s)
>>>> -{
>>>> -    int channel_data_size = sizeof(s->delay[0]);
>>>> -    switch (s->channel_mode) {
>>>> -    case AC3_CHMODE_DUALMONO:
>>>> -    case AC3_CHMODE_STEREO:
>>>> -        /* upmix mono to stereo */
>>>> -        memcpy(s->delay[1], s->delay[0], channel_data_size);
>>>> -        break;
>>>> -    case AC3_CHMODE_2F2R:
>>>> -        memset(s->delay[3], 0, channel_data_size);
>>>> -    case AC3_CHMODE_2F1R:
>>>> -        memset(s->delay[2], 0, channel_data_size);
>>>> -        break;
>>>> -    case AC3_CHMODE_3F2R:
>>>> -        memset(s->delay[4], 0, channel_data_size);
>>>> -    case AC3_CHMODE_3F1R:
>>>> -        memset(s->delay[3], 0, channel_data_size);
>>>> -    case AC3_CHMODE_3F:
>>>> -        memcpy(s->delay[2], s->delay[1], channel_data_size);
>>>> -        memset(s->delay[1], 0, channel_data_size);
>>>> -        break;
>>>> -    }
>>>> -}
>>>> +
>>>
>>> ??
>>
>> The sequence of imdct and downmix is changed in fixed point code for better accuracy,
>> so this function is not needed.
>
>So it should be also done for float version, in a separate patch (to be 
>applied before the main one).

It will be done.

-Nedeljko
________________________________________
From: Vitor Sessak [vitor1001 at gmail.com]
Sent: Monday, October 08, 2012 18:38
To: Babic, Nedeljko
Cc: FFmpeg development discussions and patches; Lukac, Zeljko
Subject: Re: [FFmpeg-devel] [PATCH 1/4] libavcodec: Implementation of AC3 fixed point decoder.

On 10/03/2012 03:17 PM, Babic, Nedeljko wrote:
> Hi Vitor,

Hello!

>>> AC3 fixed point decoder is based on AC3 floating point
>>>    decoder that is already part of FFmpeg.
>>>
>>> It does not use FFmpegs FFT. It uses  FFT  developed for
>>>    optimization of floating point AC3 decoder and because
>>>    of that currently some of the files that implement this
>>>    FFT are located in libavcodec/mips folder.
>>
>>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>>> index 9b86f7c..cf88e48 100644
>>> --- a/libavcodec/Makefile
>>> +++ b/libavcodec/Makefile
>>> @@ -80,7 +80,8 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
>>>                                              mpeg4audio.o kbdwin.o  \
>>>                                              audio_frame_queue.o
>>>    OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
>>> -OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
>>> +OBJS-$(CONFIG_AC3_DECODER)             += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o
>>> +OBJS-$(CONFIG_AC3_FIXED_DECODER)       += fft_ac3_init_tab.o fft_ac3_fixed.o ac3dec_fixed.o ac3dec_data.o ac3.o kbdwin.o
>>>    OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3enc.o ac3tab.o \
>>>                                              ac3.o kbdwin.o
>>>    OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
>>> diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
>>> index b9f34b9..0a1ff23 100644
>>> --- a/libavcodec/ac3.h
>>> +++ b/libavcodec/ac3.h
>>> @@ -27,6 +27,10 @@
>>>    #ifndef AVCODEC_AC3_H
>>>    #define AVCODEC_AC3_H
>>>
>>> +#ifndef CONFIG_AC3_FIXED
>>> +#   define CONFIG_AC3_FIXED 0
>>> +#endif
>>
>> I don't think this is needed.
>
> But how can I distinguish between fixed and floating point macros without this?
> Should I use just #ifdef CONFIG_AC3_FIXED?

Isn't CONFIG_AC3_FIXED already set to 0/1 by configure?

>>>    #define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
>>>    #define AC3_MAX_CHANNELS 7            /**< maximum number of channels, including coupling channel */
>>>    #define CPL_CH 0                      /**< coupling channel index */
>>> @@ -51,6 +55,41 @@
>>>    #define EXP_D25   2
>>>    #define EXP_D45   3
>>>
>>> +#if CONFIG_AC3_FIXED
>>> +
>>> +#define CONFIG_FFT_FLOAT 0
>>> +
>>> +/* pre-defined gain values */
>>> +#define LEVEL_PLUS_3DB          5793
>>> +#define LEVEL_PLUS_1POINT5DB    4871
>>> +#define LEVEL_MINUS_1POINT5DB   3444
>>> +#define LEVEL_MINUS_3DB         2896
>>> +#define LEVEL_MINUS_4POINT5DB   2435
>>> +#define LEVEL_MINUS_6DB         2048
>>> +#define LEVEL_MINUS_9DB         1448
>>> +#define LEVEL_ZERO              0
>>> +#define LEVEL_ONE               4096
>>> +
>>> +#define MUL_BIAS1 65536
>>> +#define MUL_BIAS2 2147418112
>>
>> If this is just the floating-point values converted to fixed, it is
>> better to use a macro to define it only once. See FIXR() macro in
>> mpegaudiodec.c.
>>
>
> You are correct and I will create macro for this.
>
>>> +#define AC3_RENAME(x)           x ## _fixed
>>
>>> +#define AC3_CENTER(x)           center_levels[x]
>>> +#define AC3_SURROUND(x)         surround_levels[x]
>>
>> Those two looks unused.
>>
>
> They are leftovers from previous version of code. Sorry.
> I will remove them.
>
>>> +#define AC3_LEVEL(x)            ((x)*23170 + 0x4000) >> 15
>>> +#define AC3_NORM(x,norm)        ((x)<<12)/norm
>>
>>> +#define AC3_DYNAMIC_RANGE(x)    (x)
>>
>> This looks very different from the float version, how can it work?
>>
>
> The way in which this is used in fixed point version of code is different
> from the way it is used in floating point version. You can see in
> function scale_coefs how dynamic range is used.
>
>>> +#define AC3_SPX_BLEND(x)        (x)
>>
>>> +#define TYPE_PREFIX(x)          fixed_ ## x
>>
>> Why not use sulfix everywhere, for simplicity and consistency?
>
> I didn't want to change the name of the function float_to_int16_interleave
> that was created before this decoder (and its name is more descriptive this way).

Some people are working to make AC3 decoder output planar data, so this
should be a non-issue soon.

>>
>>> +#define AC3_DYNAMIC_RANGE1   0
>>
>> Hmm, that is strange.
>
> Similar as above. The way that dynamic range is used in this decoder is
> different from the way it is used in floating point decoder.

Why? If it is simply better, change also the float version to use the
new way.

>>>        for (i = 0; i < s->fbw_channels; i++) {
>>>            norm0 += s->downmix_coeffs[i][0];
>>>            norm1 += s->downmix_coeffs[i][1];
>>>        }
>>
>>> -    norm0 = 1.0f / norm0;
>>> -    norm1 = 1.0f / norm1;
>>> +
>>>        for (i = 0; i < s->fbw_channels; i++) {
>>> -        s->downmix_coeffs[i][0] *= norm0;
>>> -        s->downmix_coeffs[i][1] *= norm1;
>>> +        s->downmix_coeffs[i][0] = AC3_NORM(s->downmix_coeffs[i][0],norm0);
>>> +        s->downmix_coeffs[i][1] = AC3_NORM(s->downmix_coeffs[i][1],norm1);
>>>        }
>>
>> Division is much slower than multiplication. You can do the same trick
>> in fixed point:
>>
>> norm0 = (1 << bits) / norm0;
>> s->downmix_coeffs[i][0] = MUL(s->downmix_coeffs[i][0],norm0);
>>
>> where
>>
>> #define MUL(a,b) (((int64_t) (a)) * (b)) >> (s))
>>
>
> I am not sure that accuracy of fixed point code would be ok if division is moved out
> from the loop

If you use intermediate 64-bit values, this should impact very little
the precision.

>>>        if (s->output_mode == AC3_CHMODE_MONO) {
>>>            for (i = 0; i < s->fbw_channels; i++)
>>> -            s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] +
>>> -                                       s->downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
>>> +            s->downmix_coeffs[i][0] = AC3_LEVEL(s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]);
>>>        }
>>>    }
>>
>> Hmm, is this correct?
>
> Not exactly, although it passes fate tests. I will have to add back multiplication with LEVEL_MINUS_3DB.
>
>>
>>> @@ -602,51 +605,25 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
>>>        for (ch = 1; ch <= channels; ch++) {
>>>            if (s->block_switch[ch]) {
>>>                int i;
>>> -            float *x = s->tmp_output + 128;
>>> +            FFTSample *x = s->tmp_output+128;
>>>                for (i = 0; i < 128; i++)
>>>                    x[i] = s->transform_coeffs[ch][2 * i];
>>> -            s->imdct_256.imdct_half(&s->imdct_256, s->tmp_output, x);
>>> -            s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
>>> +            s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->tmp_output, x);
>>> +            s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>>>                                          s->tmp_output, s->window, 128);
>>>                for (i = 0; i < 128; i++)
>>>                    x[i] = s->transform_coeffs[ch][2 * i + 1];
>>> -            s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch - 1], x);
>>> +            s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->delay[ch - 1], x);
>>>            } else {
>>> -            s->imdct_512.imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
>>> -            s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
>>> +            s->imdct_512.AC3_RENAME(imdct_half)(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
>>> +            s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>>>                                          s->tmp_output, s->window, 128);
>>> -            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(float));
>>> +            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(FFTSample));
>>>            }
>>>        }
>>>    }
>>>
>>> -/**
>>> - * Upmix delay samples from stereo to original channel layout.
>>> - */
>>> -static void ac3_upmix_delay(AC3DecodeContext *s)
>>> -{
>>> -    int channel_data_size = sizeof(s->delay[0]);
>>> -    switch (s->channel_mode) {
>>> -    case AC3_CHMODE_DUALMONO:
>>> -    case AC3_CHMODE_STEREO:
>>> -        /* upmix mono to stereo */
>>> -        memcpy(s->delay[1], s->delay[0], channel_data_size);
>>> -        break;
>>> -    case AC3_CHMODE_2F2R:
>>> -        memset(s->delay[3], 0, channel_data_size);
>>> -    case AC3_CHMODE_2F1R:
>>> -        memset(s->delay[2], 0, channel_data_size);
>>> -        break;
>>> -    case AC3_CHMODE_3F2R:
>>> -        memset(s->delay[4], 0, channel_data_size);
>>> -    case AC3_CHMODE_3F1R:
>>> -        memset(s->delay[3], 0, channel_data_size);
>>> -    case AC3_CHMODE_3F:
>>> -        memcpy(s->delay[2], s->delay[1], channel_data_size);
>>> -        memset(s->delay[1], 0, channel_data_size);
>>> -        break;
>>> -    }
>>> -}
>>> +
>>
>> ??
>
> The sequence of imdct and downmix is changed in fixed point code for better accuracy,
> so this function is not needed.

So it should be also done for float version, in a separate patch (to be
applied before the main one).

-Vitor