[FFmpeg-devel] [PATCH] MLP/TrueHD Decoder - 2nd try

Wed Jul 2 16:40:31 CEST 2008

On Tue, Jul 01, 2008 at 06:24:55PM +0100, Ramiro Polla wrote:
> Michael Niedermayer wrote:
>> On Tue, Jul 01, 2008 at 01:55:39AM +0100, Ramiro Polla wrote:
[...]
>> [...]
>>>> [...]
>>>>> /** Generate a PCM sample using the prediction filters and a residual 
>>>>> value
>>>>>  *  read from the data stream, and update the filter state.
>>>>>  */
>>>>>
>>>>> static int filter_sample(MLPDecodeContext *m, unsigned int substr,
>>>>>                          unsigned int channel, int32_t residual)
>>>>> {
>>>>>     unsigned int i, j, index;
>>>>>     int64_t accum = 0;
>>>>>     int32_t result;
>>>>>
>>>>>     /* TODO: Move this code to DSPContext? */
>>>>>
>>>>> #define INDEX(channel, order, pos) \
>>>>>     ((m->filter_index[channel][order] + (pos)) & (MAX_FILTER_ORDER - 
>>>>> 1))
>>>>>
>>>>>     for (j = 0; j < 2; j++)
>>>>>         for (i = 0; i < m->filter_order[channel][j]; i++)
>>>>>             accum += 
>>>>> (int64_t)m->filter_state[channel][j][INDEX(channel,j,i)] *
>>>>>                      m->filter_coeff[channel][j][i];
>>>>>
>>>>>     accum = accum >> m->filter_coeff_q[channel][FIR];
>>>>>     result = (accum + residual)
>>>>>                 & ~((1 << m->quant_step_size[substr][channel]) - 1);
>>>>>
>>>>>     index = INDEX(channel, FIR, -1);
>>>>>     m->filter_state[channel][FIR][index] = result;
>>>>>     m->filter_index[channel][FIR] = index;
>>>>>
>>>>>     index = INDEX(channel, IIR, -1);
>>>>>     m->filter_state[channel][IIR][index] = result - accum;
>>>>>     m->filter_index[channel][IIR] = index;
>>>>>
>>>>> #undef INDEX
>>>>>
>>>>>     return result;
>>>>> }
>>>> Why the complicated indexing?
>>>> how big would filter_state be if it where a normal flat array? 160 
>>>> entries?
>>> It would need
>>> sizeof(int32_t) * MAX_BLOCKSIZE * 2 * MAX_CHANNELS = 20480 = 20k
>>> bytes more in the context, and attached filter_index_flat_array.diff 
>>> patch. 
>>> I assume that's not ok =)
>> yes, but do we really need that amount?
>> if the huff decode and filter loops are split (i assume thats possible) 
>> then
>> we can do a
>> for(channels)
>>     for(sample in block)
>> which would only need sizeof(int32_t) * 2 * MAX_BLOCKSIZE
>> and a memcpy() or 2 for a reading writing the state
>
> Done this way and got some speedup. But for now I leave filter_state_buffer 
> in the context. Should I move it to the stack as in attached 
> filter_state_buffer_stack.diff patch?

yes

[...]
> /** Maximum number of substreams that can be decoded. This could also be set
>  *  higher, but again I haven't seen any examples with more than two. */
> #define MAX_SUBSTREAMS      2
> 

> /** Maximum sample frequency supported. */
> #define MAX_SAMPLERATE      192000

slightly unclear, supported by our implementation or MLP?

> 
> /** The maximum number of audio samples within one access unit. */
> #define MAX_BLOCKSIZE       (40 * (MAX_SAMPLERATE / 48000))
> /** The next power of two greater than MAX_BLOCKSIZE. */
> #define MAX_BLOCKSIZE_POW2  (64 * (MAX_SAMPLERATE / 48000))
> 
> /** Number of allowed filters. */
> #define NUM_FILTERS         2
> 
> /** The maximum number of taps in either the IIR or FIR filter.
>  *  I believe MLP actually specifies the maximum order for IIR filters is four,
>  *  and that the sum of the orders of both filters must be <= 8. */
> #define MAX_FILTER_ORDER    8
> 
> /** Number of bits used for VLC lookup - longest huffman code is 9. */
> #define VLC_BITS            9
> 
> 
> static const char* sample_message =
>     "Please file a bug report following the instructions at "
>     "http://ffmpeg.mplayerhq.hu/bugreports.html and include "
>     "a sample of this file.";
> 
> typedef struct SubStream {

>     //! For each substream, whether a restart header has been read
>     uint8_t     restart_seen;

The comment is not very informative beyond what is obvious from the variable
name

> 
>     //@{
>     /** Restart header data */
>     //! The sync word used at the start of the last restart header
>     uint16_t    restart_sync_word;
> 
>     //! The index of the first channel coded in this substream
>     uint8_t     min_channel;
>     //! The index of the last channel coded in this substream
>     uint8_t     max_channel;
>     //! The number of channels input into the rematrix stage
>     uint8_t     max_matrix_channel;
> 
>     //! The left shift applied to random noise in 0x31ea substreams
>     uint8_t     noise_shift;
>     //! The current seed value for the pseudorandom noise generator(s)
>     uint32_t    noisegen_seed;
> 

>     //! Does this substream contain extra info to check the size of VLC blocks?
>     uint8_t     data_check_present;

Describing variables with questions sounds a little odd.

> 
>     //! Bitmask of which parameter sets are conveyed in a decoding parameter block
>     uint8_t     param_presence_flags;
> #define PARAM_BLOCKSIZE     (1 << 7)
> #define PARAM_MATRIX        (1 << 6)
> #define PARAM_OUTSHIFT      (1 << 5)
> #define PARAM_QUANTSTEP     (1 << 4)
> #define PARAM_FIR           (1 << 3)
> #define PARAM_IIR           (1 << 2)
> #define PARAM_HUFFOFFSET    (1 << 1)
>     //@}
> 
>     //@{
>     /** Matrix data */
> 
>     //! Number of matrices to be applied
>     uint8_t     num_primitive_matrices;
> 

>     //! Output channel of matrix
>     uint8_t     matrix_ch[MAX_MATRICES];

matrix_out_ch

[...]
>     //! Do we have valid stream data read from a major sync block?
>     uint8_t     params_valid;

again description by question

[...]
> /** Initialize static data, constant between all invocations of the codec. */
> 
> static void init_static()

av_cold

> {
>     if (!huff_vlc[0].bits) {

checking the last set element avoids race conditions (its not a real issues
as avcodec_open() isnt thread safe)

[...]
> /** Read a restart header from a block in a substream. This contains parameters
>  *  required to decode the audio that do not change very often. Generally
>  *  (always) present only in blocks following a major sync.
>  */
> 
> static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>                                const uint8_t *buf, unsigned int substr)
> {
>     SubStream *s = &m->substream[substr];
>     unsigned int ch;
>     int sync_word, tmp;
>     uint8_t checksum;
>     uint8_t lossless_check;
>     int start_count = get_bits_count(gbp);
> 

>     sync_word = get_bits(gbp, 14);
> 
>     if ((sync_word & 0x3ffe) != 0x31ea) {

sync_word = get_bits(gbp, 13);
if(sync_word != ...

>         av_log(m->avctx, AV_LOG_ERROR,
>                "Restart header sync incorrect (got 0x%04x)\n", sync_word);
>         return -1;
>     }
>     s->restart_sync_word = sync_word;
> 
>     skip_bits(gbp, 16); /* Output timestamp */

If its a timestamp then it should end in AVFrame.pts i think

[...]
>     for (ch = s->min_channel; ch <= s->max_channel; ch++) {
>         m->filter_order  [ch][FIR] = 0;
>         m->filter_order  [ch][IIR] = 0;
>         m->filter_coeff_q[ch][FIR] = 0;
>         m->filter_coeff_q[ch][IIR] = 0;
> 
>         memset(m->filter_coeff[ch], 0, sizeof(m->filter_coeff[ch]));
>         memset(m->filter_state[ch], 0, sizeof(m->filter_state[ch]));

If order (= number of taps) is 0 then the coeffs should not matter

[...]
>     if (order > 0) {
>         int coeff_bits, coeff_shift;
> 
>         m->filter_coeff_q[channel][filter] = get_bits(gbp, 4);
> 
>         coeff_bits  = get_bits(gbp, 5);
>         coeff_shift = get_bits(gbp, 3);
>         if (coeff_bits < 1 || coeff_bits > 16) {
>             av_log(m->avctx, AV_LOG_ERROR,
>                    "%cIR filter coeff_bits must be between 1 and 16\n",
>                    fchar);
>             return -1;
>         }
>         if (coeff_bits + coeff_shift > 16) {
>             av_log(m->avctx, AV_LOG_ERROR,
>                    "Sum of coeff_bits and coeff_shift for %cIR filter must be 16 or less\n",
>                    fchar);
>             return -1;
>         }
> 
>         for (i = 0; i < order; i++)
>             m->filter_coeff[channel][filter][i] =
>                     get_sbits(gbp, coeff_bits) << coeff_shift;

Cant coeff_shift just be subtracted from filter_coeff_q ?

[...]

>                 s->matrix_ch[mat] = get_bits(gbp, 4);
>                 frac_bits = get_bits(gbp, 4);
>                 s->lsb_bypass[mat] = get_bits1(gbp);

vertical align

[...]
> /** Read a block of PCM residual (or actual if no filtering active) data.
>  */
> 
> static int read_block_data(MLPDecodeContext *m, GetBitContext *gbp,
>                            unsigned int substr)
> {
>     SubStream *s = &m->substream[substr];
>     unsigned int i, ch, expected_stream_pos = 0;
> 
>     if (s->data_check_present) {

>         expected_stream_pos = get_bits_count(gbp) + get_bits(gbp, 16);

is there something that prevents gcc from doing get_bits() first?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No snowflake in an avalanche ever feels responsible. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080702/ef899aae/attachment.pgp>