[FFmpeg-devel] [PATCH] wmapro decoder

Sun Jun 7 23:05:20 CEST 2009

On Mon, Jun 01, 2009 at 05:28:07PM +0200, Sascha Sommer wrote:
> Hi,
> 
> attached patch adds support for decoding wmapro.
> 
> I'm awaiting your review so that we can sort out the remaining issues.
[...]
> +/**
> + *@brief Get the samples per frame for this stream.
> + *@param sample_rate output sample_rate
> + *@param decode_flags codec compression features
> + *@return number of output samples per frame
> + */
> +static int av_cold wma_get_samples_per_frame(int sample_rate,
> +                                             unsigned int decode_flags)
> +{
> +
> +    int samples_per_frame;
> +    int tmp;
> +
> +    if (sample_rate <= 16000)
> +        samples_per_frame = 512;
> +    else if (sample_rate <= 22050)
> +        samples_per_frame = 1024;
> +    else if (sample_rate <= 48000)
> +        samples_per_frame = 2048;
> +    else if (sample_rate <= 96000)
> +        samples_per_frame = 4096;
> +    else
> +        samples_per_frame = 8192;

mergeable with the related code from ff_wma_init()

[...]
> +/**
> + *@brief Uninitialize the decoder and free all resources.
> + *@param avctx codec context
> + *@return 0 on success, < 0 otherwise
> + */
> +static av_cold int wma_decode_end(AVCodecContext *avctx)
> +{
> +    WMA3DecodeContext *s = avctx->priv_data;
> +    int i;
> +
> +    av_free(s->num_sfb);
> +    av_free(s->sfb_offsets);
> +    av_free(s->subwoofer_cutoffs);
> +    av_free(s->sf_offsets);
> +
> +    av_free(s->def_decorrelation_mat);

i tend to prefer av_freep() for sake of saftey

> +
> +    for (i=0 ; i<BLOCK_NB_SIZES ; i++)
> +        ff_mdct_end(&s->mdct_ctx[i]);
> +
> +    return 0;
> +}
> +
> +/**
> + *@brief Initialize the decoder.
> + *@param avctx codec context
> + *@return 0 on success, -1 otherwise
> + */
> +static av_cold int wma_decode_init(AVCodecContext *avctx)
> +{
> +    WMA3DecodeContext *s = avctx->priv_data;
> +    uint8_t *edata_ptr = avctx->extradata;
> +    int16_t* sfb_offsets;
> +    unsigned int channel_mask;
> +    int i;
> +
> +    s->avctx = avctx;
> +    dsputil_init(&s->dsp, avctx);
> +
> +    /** FIXME: is this really the right thing to do for 24 bits? */
> +    s->sample_bit_depth = 16; // avctx->bits_per_sample;
> +    if (avctx->extradata_size >= 18) {
> +        s->decode_flags     = AV_RL16(edata_ptr+14);
> +        channel_mask    = AV_RL32(edata_ptr+2);
> +//        s->sample_bit_depth = AV_RL16(edata_ptr);
> +

> +        /** dump the extradata */
> +        for (i=0 ; i<avctx->extradata_size ; i++)
> +            av_log(avctx, AV_LOG_DEBUG, "[%x] ",avctx->extradata[i]);
> +        av_log(avctx, AV_LOG_DEBUG, "\n");

this stuff will have to be droped or made accessible via AVCodecContext.debug
in the final version

> +
> +    } else {
> +        av_log(avctx, AV_LOG_ERROR, "Unknown extradata size %d.\n",
> +                      avctx->extradata_size);
> +        return -1;
> +    }
> +
> +    /** generic init */
> +    s->log2_frame_size = av_log2(avctx->block_align*8)+1;
> +
> +    /** frame info */
> +    s->skip_frame = 1; /** skip first frame */
> +    s->packet_loss = 1;
> +    s->len_prefix = (s->decode_flags & 0x40) >> 6;
> +

> +    if (!s->len_prefix) {
> +         av_log(avctx, AV_LOG_ERROR, "no length prefix, please report\n");

ff_log_ask_for_sample

> +         return -1;
> +    }
> +
> +    /** get frame len */
> +    s->samples_per_frame = wma_get_samples_per_frame(avctx->sample_rate,
> +                                                 s->decode_flags);
> +
> +    /** init previous block len */
> +    for (i=0;i<avctx->channels;i++)
> +        s->channel[i].prev_block_len = s->samples_per_frame;
> +
> +    /** subframe info */

> +    s->max_num_subframes = 1 << ((s->decode_flags & 0x38) >> 3);
> +    s->num_possible_block_sizes = av_log2(s->max_num_subframes) + 1;

IMHO ugly, i mean 1<< and then log2

> +    s->min_samples_per_subframe = s->samples_per_frame / s->max_num_subframes;

>>

> +    s->dynamic_range_compression = (s->decode_flags & 0x80) >> 7;
> +
> +    if (s->max_num_subframes > MAX_SUBFRAMES) {
> +        av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %i\n",
> +                      s->max_num_subframes);
> +        return -1;
> +    }
> +
> +    s->num_channels = avctx->channels;
> +
> +    /** extract lfe channel position */
> +    s->lfe_channel = -1;
> +

> +    if (channel_mask & 8) {
> +        unsigned int mask = 1;
> +        for (i=0;i<32;i++) {
> +            if (channel_mask & mask)
> +                ++s->lfe_channel;
> +            if (mask & 8)
> +                break;
> +            mask <<= 1;
> +        }
> +    }

looks buggy
the loops would do 32 iterations but the tests limit it to 4

> +
> +    if (s->num_channels < 0 || s->num_channels > MAX_CHANNELS) {
> +        av_log(avctx, AV_LOG_ERROR, "invalid number of channels %i\n",
> +                      s->num_channels);
> +        return -1;
> +    }
> +
> +    INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, FF_WMA3_HUFF_SCALE_SIZE,
> +                 ff_wma3_scale_huffbits, 1, 1,
> +                 ff_wma3_scale_huffcodes, 4, 4, 616);
> +
> +    INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, FF_WMA3_HUFF_SCALE_RL_SIZE,
> +                 ff_wma3_scale_rl_huffbits, 1, 1,
> +                 ff_wma3_scale_rl_huffcodes, 4, 4, 1406);
> +
> +    INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, FF_WMA3_HUFF_COEF0_SIZE,
> +                 ff_wma3_coef0_huffbits, 1, 1,
> +                 ff_wma3_coef0_huffcodes, 4, 4, 2108);
> +
> +    s->coef_max[0] = ((FF_WMA3_HUFF_COEF0_MAXBITS+VLCBITS-1)/VLCBITS);
> +
> +    INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, FF_WMA3_HUFF_COEF1_SIZE,
> +                 ff_wma3_coef1_huffbits, 1, 1,
> +                 ff_wma3_coef1_huffcodes, 4, 4, 3912);
> +
> +    s->coef_max[1] = ((FF_WMA3_HUFF_COEF1_MAXBITS+VLCBITS-1)/VLCBITS);
> +
> +    INIT_VLC_STATIC(&vec4_vlc, VLCBITS, FF_WMA3_HUFF_VEC4_SIZE,
> +                 ff_wma3_vec4_huffbits, 1, 1,
> +                 ff_wma3_vec4_huffcodes, 4, 4, 604);
> +
> +    INIT_VLC_STATIC(&vec2_vlc, VLCBITS, FF_WMA3_HUFF_VEC2_SIZE,
> +                 ff_wma3_vec2_huffbits, 1, 1,
> +                 ff_wma3_vec2_huffcodes, 4, 4, 562);
> +
> +    INIT_VLC_STATIC(&vec1_vlc, VLCBITS, FF_WMA3_HUFF_VEC1_SIZE,
> +                 ff_wma3_vec1_huffbits, 1, 1,
> +                 ff_wma3_vec1_huffcodes, 4, 4, 562);
> +
> +    s->num_sfb = av_mallocz(sizeof(int8_t)*s->num_possible_block_sizes);
> +    s->sfb_offsets = av_mallocz(MAX_BANDS *
> +                                sizeof(int16_t) * s->num_possible_block_sizes);
> +    s->subwoofer_cutoffs = av_mallocz(sizeof(int16_t) *
> +                                      s->num_possible_block_sizes);
> +    s->sf_offsets = av_mallocz(MAX_BANDS * s->num_possible_block_sizes *
> +                               s->num_possible_block_sizes * sizeof(int16_t));
> +
> +    if (!s->num_sfb ||
> +       !s->sfb_offsets || !s->subwoofer_cutoffs || !s->sf_offsets) {
> +        av_log(avctx, AV_LOG_ERROR,
> +                      "failed to allocate scale factor offset tables\n");
> +        wma_decode_end(avctx);
> +        return -1;
> +    }
> +
> +    /** calculate number of scale factor bands and their offsets
> +        for every possible block size */
> +    sfb_offsets = s->sfb_offsets;
> +
> +    for (i=0;i<s->num_possible_block_sizes;i++) {

> +        int subframe_len = s->samples_per_frame / (1 << i);

samples_per_frame >> i

[...]
> +/**
> + *@brief Decode how the data in the frame is split into subframes.
> + *       Every WMA frame contains the encoded data for a fixed number of
> + *       samples per channel. The data for every channel might be split
> + *       into several subframes. This function will reconstruct the list of
> + *       subframes for every channel.
> + *
> + *       If the subframes are not evenly split, the algorithm estimates the
> + *       channels with the lowest number of total samples.
> + *       Afterwards, for each of these channels a bit is read from the
> + *       bitstream that indicates if the channel contains a frame with the
> + *       next subframe size that is going to be read from the bitstream or not.
> + *       If a channel contains such a subframe, the subframe size gets added to
> + *       the channel's subframe list.
> + *       The algorithm repeats these steps until the frame is properly divided
> + *       between the individual channels.
> + *
> + *@param s context
> + *@return 0 on success, < 0 in case of an error
> + */
> +static int wma_decode_tilehdr(WMA3DecodeContext *s)
> +{
> +    int c;
> +    int missing_samples = s->num_channels * s->samples_per_frame;
> +
> +    /* should never consume more than 3073 bits (256 iterations for the
> +     * while loop when always the minimum amount of 128 samples is substracted
> +     * from missing samples in the 8 channel case)
> +     * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
> +     */
> +
> +    /** reset tiling information */
> +    for (c=0;c<s->num_channels;c++) {
> +        s->channel[c].num_subframes = 0;
> +        s->channel[c].channel_len = 0;
> +    }
> +
> +    /** handle the easy case with one constant-sized subframe per channel */
> +    if (s->max_num_subframes == 1) {
> +        for (c=0;c<s->num_channels;c++) {
> +            s->channel[c].num_subframes = 1;
> +            s->channel[c].subframe_len[0] = s->samples_per_frame;
> +            s->channel[c].channel_len = 0;
> +        }
> +    }else{ /** subframe length and number of subframes is not constant */
> +        /** bits needed for the subframe length */
> +        int subframe_len_bits = 0;
> +        /** first bit indicates if length is zero */
> +        int subframe_len_zero_bit = 0;
> +        /** all channels have the same subframe layout */
> +        int fixed_channel_layout;
> +
> +        fixed_channel_layout = get_bits1(&s->gb);
> +

> +        /** calculate subframe len bits */
> +        if (s->lossless)
> +            subframe_len_bits = av_log2(s->max_num_subframes - 1) + 1;
> +        else if (s->max_num_subframes == 16) {

if{
}else

[...]
> +/**
> + *@brief Extract the coefficients from the bitstream.
> + *@param s codec context
> + *@param c current channel number
> + *@return 0 in case of bitstream errors, 1 on success
> + */
> +static int wma_decode_coeffs(WMA3DecodeContext *s, int c)
> +{
> +    int vlctable;
> +    VLC* vlc;
> +    int vlcmax;
> +    WMA3ChannelCtx* ci = &s->channel[c];
> +    int rl_mode = 0;
> +    int cur_coeff = 0;
> +    int num_zeros = 0;
> +    const uint8_t* run;
> +    const uint8_t* level;
> +    int zero_init = 0;
> +    int rl_switchmask = (s->subframe_len>>8);
> +
> +    av_log(s->avctx,AV_LOG_DEBUG,"decode coefficients for channel %i\n",c);
> +
> +    vlctable = get_bits1(&s->gb);
> +    vlc = &coef_vlc[vlctable];
> +    vlcmax = s->coef_max[vlctable];
> +
> +    if (vlctable) {
> +        run = ff_wma3_coef1_run;
> +        level = ff_wma3_coef1_level;
> +    }else{
> +        run = ff_wma3_coef0_run;
> +        level = ff_wma3_coef0_level;
> +    }
> +
> +    /** for subframe_len 128 the first zero coefficient will switch to
> +        the run level mode */
> +    if (s->subframe_len == 128) {
> +        zero_init = num_zeros = 1;
> +        rl_switchmask = 1;
> +    }
> +
> +    /** decode vector coefficients (consumes up to 167 bits per iteration for
> +      4 vector coded large values) */
> +    while (!rl_mode && cur_coeff + 3 < s->subframe_len) {
> +        int vals[4];
> +        int i;
> +        unsigned int idx;
> +
> +        idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS,
> +                       ((FF_WMA3_HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS));
> +
> +        if ( idx == FF_WMA3_HUFF_VEC4_SIZE - 1 ) {

> +            i = 0;
> +            while (i < 4) {
[...]
> +                i += 2;

for()

[...]
> +/**
> + *@brief Extract scale factors from the bitstream.
> + *@param s codec context
> + *@return 0 in case of bitstream errors, 1 on success
> + */
> +static int wma_decode_scale_factors(WMA3DecodeContext* s)
> +{
> +    int i;
> +
> +    /** should never consume more than 5344 bits
> +     *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
> +     */
> +
> +    for (i=0;i<s->channels_for_cur_subframe;i++) {
> +        int c = s->channel_indexes_for_cur_subframe[i];
> +        int* sf;
> +        int* sf_end = s->channel[c].scale_factors + s->num_bands;
> +
> +        /** resample scale factors for the new block size */
> +        if (s->channel[c].reuse_sf) {
> +            const int blocks_per_frame = s->samples_per_frame/s->subframe_len;
> +            const int res_blocks_per_frame = s->samples_per_frame /
> +                                          s->channel[c].scale_factor_block_len;
> +            const int idx0 = av_log2(blocks_per_frame);
> +            const int idx1 = av_log2(res_blocks_per_frame);
> +            const int16_t* sf_offsets =
> +                               &s->sf_offsets[s->num_possible_block_sizes *
> +                               MAX_BANDS  * idx0 + MAX_BANDS * idx1];
> +            int b;
> +            for (b=0;b<s->num_bands;b++)
> +                s->channel[c].resampled_scale_factors[b] =
> +                                   s->channel[c].scale_factors[*sf_offsets++];
> +
> +            s->channel[c].max_scale_factor =
> +                                   s->channel[c].resampled_scale_factors[0];
> +            sf = s->channel[c].resampled_scale_factors + 1;
> +            while (sf < s->channel[c].resampled_scale_factors + s->num_bands) {
> +                if (*sf > s->channel[c].max_scale_factor)
> +                    s->channel[c].max_scale_factor = *sf;
> +                ++sf;
> +            }
> +        }
> +
> +        if (s->channel[c].cur_subframe > 0) {
> +            s->channel[c].transmit_sf = get_bits1(&s->gb);
> +        }else
> +            s->channel[c].transmit_sf = 1;
> +
> +        if (s->channel[c].transmit_sf) {
> +
> +            if (!s->channel[c].reuse_sf) {
> +                int val;
> +                /** decode DPCM coded scale factors */
> +                s->channel[c].scale_factor_step = get_bits(&s->gb,2) + 1;
> +                val = get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS,
> +                   ((FF_WMA3_HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS));
> +                s->channel[c].scale_factors[0] = 45 /
> +                              s->channel[c].scale_factor_step + val - 60;

> +                for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
> +                    val = get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS,
> +                  ((FF_WMA3_HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS));
> +                    *sf = *(sf - 1) + val - 60;
> +                }

the way this line is wraped is very ugly, even not wraping at all would be
better
also the calculation of the stages could be moved into some define, greatly
simplifyng the look of get_vlc*

val= 45 / s->channel[c].scale_factor_step;
for(sf= s->channel[c].scale_factors...){
    val += get_vlc2() - 60;
    *sf= val;
}

> +            }else{
> +                int i;
> +                /** run level decode differences to the resampled factors */
> +
> +                memcpy(s->channel[c].scale_factors,
> +                       s->channel[c].resampled_scale_factors,
> +                       sizeof(int) * s->num_bands);
> +

> +                for (i=0;i<s->num_bands;i++) {
> +                    int idx;
> +                    short skip;
> +                    short val;
> +                    short sign;

is there any reason why they are short ?

> +
> +                    idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS,
> +                          ((FF_WMA3_HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS));
> +
> +                    if ( !idx ) {

> +                        uint32_t code = get_bits(&s->gb,14);
> +                        val = code >> 6;
> +                        sign = (code & 1) - 1;
> +                        skip = (code & 0x3f)>>1;

is that faster than 3 get_bits() ?

[...]
> +/**
> + *@brief Decorrelate and undo M/S stereo coding.

decorrelation is what the encoder does

> + *@param s codec context
> + */
> +static void wma_inverse_channel_transform(WMA3DecodeContext *s)
> +{
> +    int i;
> +
> +    for (i=0;i<s->num_chgroups;i++) {
> +
> +        if (s->chgroup[i].transform == 1) {
> +            /** M/S stereo decoding */
> +            int16_t* sfb_offsets = s->cur_sfb_offsets;
> +            float* ch0 = *sfb_offsets + s->channel[0].coeffs;
> +            float* ch1 = *sfb_offsets++ + s->channel[1].coeffs;
> +            const char* tb = s->chgroup[i].transform_band;
> +            const char* tb_end = tb + s->num_bands;
> +
> +            while (tb < tb_end) {
> +                const float* ch0_end = s->channel[0].coeffs +
> +                                       FFMIN(*sfb_offsets,s->subframe_len);
> +                if (*tb++ == 1) {
> +                    while (ch0 < ch0_end) {
> +                        const float v1 = *ch0;
> +                        const float v2 = *ch1;
> +                        *ch0++ = v1 - v2;
> +                        *ch1++ = v1 + v2;
> +                    }
> +                }else{
> +                    while (ch0 < ch0_end) {
> +                        *ch0++ *= 181.0 / 128;
> +                        *ch1++ *= 181.0 / 128;
> +                    }
> +                }
> +                ++sfb_offsets;
> +            }
> +        }else if (s->chgroup[i].transform) {
> +            float data[MAX_CHANNELS];
> +            const int num_channels = s->chgroup[i].num_channels;
> +            float** ch_data = s->chgroup[i].channel_data;
> +            float** ch_end = ch_data + num_channels;
> +            const int8_t* tb = s->chgroup[i].transform_band;
> +            int16_t* sfb;
> +
> +            /** multichannel decorrelation */
> +            for (sfb = s->cur_sfb_offsets ;
> +                sfb < s->cur_sfb_offsets + s->num_bands;sfb++) {
> +                if (*tb++ == 1) {
> +                    int y;
> +                    /** multiply values with the decorrelation_matrix */
> +                    for (y=sfb[0];y<FFMIN(sfb[1], s->subframe_len);y++) {
> +                        const float* mat = s->chgroup[i].decorrelation_matrix;
> +                        const float* data_end= data + num_channels;
> +                        float* data_ptr= data;
> +                        float** ch;
> +
> +                        for (ch = ch_data;ch < ch_end; ch++)
> +                           *data_ptr++ = (*ch)[y];
> +
> +                        for (ch = ch_data; ch < ch_end; ch++) {
> +                            float sum = 0;
> +                            data_ptr = data;
> +                            while (data_ptr < data_end)
> +                                sum += *data_ptr++ * *mat++;
> +
> +                            (*ch)[y] = sum;
> +                        }
> +                    }
> +                }
> +            }
> +        }
> +    }
> +}

isnt the if() a special case of te else ? if so this can be simplified
and if its faster as is the special case could be limited to the inner
loop

[...]
> +/**
> + *@brief Decode a single subframe (block).
> + *@param s codec context
> + *@return 0 if decoding failed, 1 on success
> + */
> +static int wma_decode_subframe(WMA3DecodeContext *s)
> +{
> +    int offset = s->samples_per_frame;
> +    int subframe_len = s->samples_per_frame;
> +    int i;
> +    int total_samples = s->samples_per_frame * s->num_channels;
> +    int transmit_coeffs = 0;
> +
> +    s->subframe_offset = get_bits_count(&s->gb);
> +
> +    /** reset channel context and find the next block offset and size
> +        == the next block of the channel with the smallest number of
> +        decoded samples
> +    */
> +    for (i=0;i<s->num_channels;i++) {
> +        s->channel[i].grouped = 0;
> +        if (offset > s->channel[i].decoded_samples) {
> +            offset = s->channel[i].decoded_samples;
> +            subframe_len =
> +                s->channel[i].subframe_len[s->channel[i].cur_subframe];
> +        }
> +    }
> +
> +    av_log(s->avctx, AV_LOG_DEBUG,
> +           "processing subframe with offset %i len %i\n",offset,subframe_len);
> +
> +    /** get a list of all channels that contain the estimated block */
> +    s->channels_for_cur_subframe = 0;
> +    for (i=0;i<s->num_channels;i++) {
> +        const int cur_subframe = s->channel[i].cur_subframe;
> +        /** substract already processed samples */
> +        total_samples -= s->channel[i].decoded_samples;
> +
> +        /** and count if there are multiple subframes that match our profile */
> +        if (offset == s->channel[i].decoded_samples &&
> +           subframe_len == s->channel[i].subframe_len[cur_subframe]) {
> +            total_samples -= s->channel[i].subframe_len[cur_subframe];
> +            s->channel[i].decoded_samples +=
> +                s->channel[i].subframe_len[cur_subframe];
> +            s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
> +            ++s->channels_for_cur_subframe;
> +        }
> +    }
> +
> +    /** check if the frame will be complete after processing the
> +        estimated block */
> +    if (!total_samples)
> +        s->parsed_all_subframes = 1;
> +
> +
> +    av_log(s->avctx, AV_LOG_DEBUG,"subframe is part of %i channels\n",
> +           s->channels_for_cur_subframe);
> +
> +    /** calculate number of scale factor bands and their offsets */
> +    if (subframe_len == s->samples_per_frame) {
> +        s->num_bands = s->num_sfb[0];
> +        s->cur_sfb_offsets = s->sfb_offsets;
> +        s->cur_subwoofer_cutoff = s->subwoofer_cutoffs[0];
> +    }else{
> +        int frame_offset = av_log2(s->samples_per_frame/subframe_len);
> +        s->num_bands = s->num_sfb[frame_offset];
> +        s->cur_sfb_offsets = &s->sfb_offsets[MAX_BANDS * frame_offset];
> +        s->cur_subwoofer_cutoff = s->subwoofer_cutoffs[frame_offset];
> +    }
> +
> +    /** configure the decoder for the current subframe */
> +    for (i=0;i<s->channels_for_cur_subframe;i++) {
> +        int c = s->channel_indexes_for_cur_subframe[i];
> +
> +        s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame>>1)
> +                                                  + offset];
> +        memset(s->channel[c].coeffs,0,sizeof(float) * subframe_len);
> +
> +        /** init some things if this is the first subframe */
> +        if (!s->channel[c].cur_subframe) {
> +              s->channel[c].scale_factor_step = 1;
> +              s->channel[c].max_scale_factor = 0;
> +              memset(s->channel[c].scale_factors, 0,
> +                     sizeof(s->channel[c].scale_factors));
> +              memset(s->channel[c].resampled_scale_factors, 0,
> +                     sizeof(s->channel[c].resampled_scale_factors));
> +        }
> +
> +    }
> +
> +    s->subframe_len = subframe_len;
> +    s->esc_len = av_log2(s->subframe_len - 1) + 1;
> +
> +    /** skip extended header if any */
> +    if (get_bits1(&s->gb)) {
> +        int num_fill_bits;
> +        if (!(num_fill_bits = get_bits(&s->gb,2))) {
> +            num_fill_bits = get_bits(&s->gb,4);
> +            num_fill_bits = get_bits(&s->gb,num_fill_bits) + 1;
> +        }
> +
> +        if (num_fill_bits >= 0) {
> +            if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
> +                av_log(s->avctx,AV_LOG_ERROR,"invalid number of fill bits\n");
> +                return 0;
> +            }
> +
> +            skip_bits_long(&s->gb,num_fill_bits);
> +        }
> +    }
> +
> +    /** no idea for what the following bit is used */
> +    if (get_bits1(&s->gb)) {
> +        av_log(s->avctx,AV_LOG_ERROR,"reserved bit set\n");
> +        return 0;
> +    }
> +
> +
> +    if (!wma_decode_channel_transform(s))
> +        return 0;
> +
> +
> +    for (i=0;i<s->channels_for_cur_subframe;i++) {
> +        int c = s->channel_indexes_for_cur_subframe[i];
> +        if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
> +            transmit_coeffs = 1;
> +    }
> +
> +    s->quant_step = 90 * s->sample_bit_depth >> 4;
> +

> +    if (transmit_coeffs) {
> +        int quant;
> +        int sign = 1;
> +        if ((get_bits1(&s->gb))) {
> +            /** FIXME: might change run level mode decision */
> +            av_log(s->avctx,AV_LOG_ERROR,"unsupported quant step coding\n");
> +            return 0;
> +        }
> +        /** decode quantization step */
> +        quant = get_bits(&s->gb,6);
> +        if (quant & 0x20) {
> +            quant |= 0xFFFFFFC0u;
> +            sign = -1;
> +        }

get_sbits()

> +        s->quant_step += quant;
> +        if (quant <= -32 || quant > 30) {
> +            while (get_bits_count(&s->gb) + 5 < s->num_saved_bits) {
> +                quant = get_bits(&s->gb,5);
> +                if (quant != 31) {
> +                    s->quant_step += quant * sign;
> +                    break;
> +                }
> +                s->quant_step += 31 * sign;
> +                if (s->quant_step < 0) {
> +                    av_log(s->avctx,AV_LOG_DEBUG,"negative quant step\n");
> +                }
> +            }
> +        }
> +
> +        /** decode quantization step modifiers for every channel */
> +
> +        if (s->channels_for_cur_subframe == 1)
> +            s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step_modifier = 0;
> +        else{
> +            int modifier_len = get_bits(&s->gb,3);
> +            for (i=0;i<s->channels_for_cur_subframe;i++) {
> +                int c = s->channel_indexes_for_cur_subframe[i];
> +                s->channel[c].quant_step_modifier = 0;
> +                if (get_bits1(&s->gb)) {
> +                    if (modifier_len)
> +                        s->channel[c].quant_step_modifier =
> +                                get_bits(&s->gb,modifier_len) + 1;
> +                    else
> +                        s->channel[c].quant_step_modifier = 1;
> +                }else
> +                    s->channel[c].quant_step_modifier = 0;
> +
> +            }
> +        }
> +
> +        /** decode scale factors */
> +        if (!wma_decode_scale_factors(s))
> +            return 0;
> +    }
> +
> +    av_log(s->avctx,AV_LOG_DEBUG,"BITSTREAM: subframe header length was %i\n",
> +           get_bits_count(&s->gb) - s->subframe_offset);
> +
> +    /** parse coefficients */
> +    for (i=0;i<s->channels_for_cur_subframe;i++) {
> +        int c = s->channel_indexes_for_cur_subframe[i];
> +        if (s->channel[c].transmit_coefs &&
> +           get_bits_count(&s->gb) < s->num_saved_bits)
> +                wma_decode_coeffs(s,c);
> +    }
> +
> +    av_log(s->avctx,AV_LOG_DEBUG,"BITSTREAM: subframe length was %i\n",
> +           get_bits_count(&s->gb) - s->subframe_offset);
> +
> +    if (transmit_coeffs) {
> +        /** reconstruct the per channel data */
> +        wma_inverse_channel_transform(s);
> +        for (i=0;i<s->channels_for_cur_subframe;i++) {
> +            int c = s->channel_indexes_for_cur_subframe[i];
> +            int b;
> +            if (c == s->lfe_channel)
> +                memset(&s->tmp[s->cur_subwoofer_cutoff],0,
> +                     sizeof(float) * (subframe_len - s->cur_subwoofer_cutoff));
> +
> +            /** inverse quantization and rescaling */
> +            for (b=0;b<s->num_bands;b++) {
> +                int start = s->cur_sfb_offsets[b];

> +                int end = s->cur_sfb_offsets[b+1];
> +                int sf = s->channel[c].max_scale_factor;
> +                float quant;
> +                if (end > s->subframe_len)
> +                    end = s->subframe_len;

FFMIN

> +
> +                if (s->channel[c].transmit_sf)
> +                     sf -= s->channel[c].scale_factors[b];
> +                else
> +                     sf -= s->channel[c].resampled_scale_factors[b];
> +                sf *= -s->channel[c].scale_factor_step;
> +                sf += s->quant_step + s->channel[c].quant_step_modifier;

> +                quant = pow(10.0,sf / 20.0);

this can be simplified

> +                while (start < end) {
> +                    s->tmp[start] = s->channel[c].coeffs[start] * quant;
> +                    ++start;
> +                }

for(start= s->cur_sfb_offsets[b]; start < end; ++start)

[...]
> +/**
> + *@brief Decode a single WMA packet.
> + *@param avctx codec context
> + *@param data the output buffer
> + *@param data_size number of bytes that were written to the output buffer
> + *@param avpkt input packet
> + *@return number of bytes that were read from the input buffer
> + */
> +static int wma_decode_packet(AVCodecContext *avctx,
> +                             void *data, int *data_size, AVPacket* avpkt)
> +{
> +    GetBitContext gb;
> +    WMA3DecodeContext *s = avctx->priv_data;
> +    const uint8_t* buf = avpkt->data;
> +    int buf_size = avpkt->size;
> +    int more_frames=1;
> +    int num_bits_prev_frame;
> +    int packet_sequence_number;
> +
> +    s->samples = data;
> +    s->samples_end = (int16_t*)((int8_t*)data + *data_size);
> +    s->buf_bit_size = buf_size << 3;
> +
> +
> +    *data_size = 0;
> +
> +    /** sanity check for the buffer length */
> +    if (buf_size < avctx->block_align)
> +        return 0;
> +
> +    buf_size = avctx->block_align;
> +
> +    /** parse packet header */
> +    init_get_bits(&gb, buf, s->buf_bit_size);
> +    packet_sequence_number    = get_bits(&gb, 4);
> +    skip_bits(&gb, 2);
> +
> +    /** get number of bits that need to be added to the previous frame */
> +    num_bits_prev_frame = get_bits(&gb, s->log2_frame_size);
> +    av_log(avctx, AV_LOG_DEBUG, "packet[%d]: nbpf %x\n", avctx->frame_number,
> +                  num_bits_prev_frame);
> +
> +    /** check for packet loss */
> +    if (!s->packet_loss &&
> +        ((s->packet_sequence_number + 1)&0xF) != packet_sequence_number) {
> +        s->packet_loss = 1;
> +        av_log(avctx, AV_LOG_ERROR, "Packet loss detected! seq %x vs %x\n",
> +                      s->packet_sequence_number,packet_sequence_number);
> +    }
> +    s->packet_sequence_number = packet_sequence_number;
> +
> +    if (num_bits_prev_frame > 0) {
> +        /** append the previous frame data to the remaining data from the
> +            previous packet to create a full frame */
> +        wma_save_bits(s, &gb, num_bits_prev_frame, 1);
> +        av_log(avctx, AV_LOG_DEBUG, "accumulated %x bits of frame data\n",
> +                      s->num_saved_bits - s->frame_offset);
> +
> +        /** decode the cross packet frame if it is valid */
> +        if (!s->packet_loss)
> +            wma_decode_frame(s);
> +    }else if (s->num_saved_bits - s->frame_offset) {
> +        av_log(avctx, AV_LOG_DEBUG, "ignoring %x previously saved bits\n",
> +                      s->num_saved_bits - s->frame_offset);
> +    }
> +
> +    s->packet_loss = 0;
> +    /** decode the rest of the packet */
> +    while (!s->packet_loss && more_frames &&
> +          wma_remaining_bits(s,&gb) > s->log2_frame_size) {
> +        int frame_size = show_bits(&gb, s->log2_frame_size);
> +
> +        /** there is enough data for a full frame */
> +        if (wma_remaining_bits(s,&gb) >= frame_size && frame_size > 0) {
> +            wma_save_bits(s, &gb, frame_size, 0);
> +
> +            /** decode the frame */
> +            more_frames = wma_decode_frame(s);
> +
> +            if (!more_frames) {
> +                av_log(avctx, AV_LOG_DEBUG, "no more frames\n");
> +            }
> +        }else
> +            more_frames = 0;
> +    }
> +

> +    if (!s->packet_loss) {
> +        /** save the rest of the data so that it can be decoded
> +            with the next packet */
> +        wma_save_bits(s, &gb, wma_remaining_bits(s,&gb), 0);
> +    }

and if a packet is lost the data is thrown away?
is the data always useless or could it be of a correctly decodeable frame?

[...]
> +/**
> + * @brief decoder context for a single channel
> + */
> +typedef struct {
> +    int16_t  prev_block_len;                          ///< length of the previous block
> +    uint8_t  transmit_coefs;                          ///< transmit coefficients
> +    uint8_t  num_subframes;                           ///< number of subframes
> +    uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
> +    uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe position
> +    uint8_t  cur_subframe;                            ///< subframe index
> +    uint16_t channel_len;                             ///< channel length in samples
> +    uint16_t decoded_samples;                         ///< already processed samples
> +    uint8_t  grouped;                                 ///< channel is part of a group
> +    int8_t   quant_step_modifier;                     ///< deviation from the main quantization step
> +    int8_t   transmit_sf;                             ///< transmit scale factors
> +    int8_t   reuse_sf;                                ///< share scale factors between subframes
> +    int8_t   scale_factor_step;                       ///< scaling step
> +    int      max_scale_factor;                        ///< maximum scale factor
> +    int      scale_factors[MAX_BANDS];                ///< scale factor values
> +    int      resampled_scale_factors[MAX_BANDS];      ///< scale factors from a previous block
> +    int16_t  scale_factor_block_len;                  ///< scale factor reference block length
> +    float*   coeffs;                                  ///< pointer to the decode buffer
> +    DECLARE_ALIGNED_16(float, out[2*BLOCK_MAX_SIZE]); ///< output buffer
> +} WMA3ChannelCtx;
> +

> +/**
> + * @brief channel group for channel transformations
> + */
> +typedef struct {
> +    uint8_t num_channels;                                     ///< number of channels in the group
> +    char    transform;                                        ///< controls the type of the transform
> +    char    transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band

you use *int8_t at some points and char here ...
besides that i wonder why intXY_t instead of int in some cases

[...]
> +static const uint32_t ff_wma3_scale_huffcodes[FF_WMA3_HUFF_SCALE_SIZE] = {
> +    0x0E639, 0x0E6C2, 0x0E6C1, 0x0E6C0, 0x0E63F, 0x0E63E, 0x0E63D, 0x0E63C,
> +    0x0E63B, 0x0E63A, 0x0E638, 0x0E637, 0x0E636, 0x0E635, 0x0E634, 0x0E632,
> +    0x0E633, 0x0E620, 0x0737B, 0x0E610, 0x0E611, 0x0E612, 0x0E613, 0x0E614,
> +    0x0E615, 0x0E616, 0x0E617, 0x0E618, 0x0E619, 0x0E61A, 0x0E61B, 0x0E61C,
> +    0x0E61D, 0x0E61E, 0x0E61F, 0x0E6C3, 0x0E621, 0x0E622, 0x0E623, 0x0E624,
> +    0x0E625, 0x0E626, 0x0E627, 0x0E628, 0x0E629, 0x0E62A, 0x0E62B, 0x0E62C,
> +    0x0E62D, 0x0E62E, 0x0E62F, 0x0E630, 0x0E631, 0x01CDF, 0x00E60, 0x00399,
> +    0x000E7, 0x0001D, 0x00000, 0x00001, 0x00001, 0x00001, 0x00002, 0x00006,
> +    0x00002, 0x00007, 0x00006, 0x0000F, 0x00038, 0x00072, 0x0039A, 0x0E6C4,
> +    0x0E6C5, 0x0E6C6, 0x0E6C7, 0x0E6C8, 0x0E6C9, 0x0E6CA, 0x0E6CB, 0x0E6CC,
> +    0x0E6CD, 0x0E6CE, 0x0E6CF, 0x0E6D0, 0x0E6D1, 0x0E6D2, 0x0E6D3, 0x0E6D4,
> +    0x0E6D5, 0x0E6D6, 0x0E6D7, 0x0E6D8, 0x0E6D9, 0x0E6DA, 0x0E6DB, 0x0E6DC,
> +    0x0E6DD, 0x0E6DE, 0x0E6DF, 0x0E6E0, 0x0E6E1, 0x0E6E2, 0x0E6E3, 0x0E6E4,
> +    0x0E6E5, 0x0E6E6, 0x0E6E7, 0x0E6E8, 0x0E6E9, 0x0E6EA, 0x0E6EB, 0x0E6EC,
> +    0x0E6ED, 0x0E6EE, 0x0E6EF, 0x0E6F0, 0x0E6F1, 0x0E6F2, 0x0E6F3, 0x0E6F4,
> +    0x0E6F5,
> +};

this one fits in 16 bits and its not the only such table

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Complexity theory is the science of finding the exact solution to an
approximation. Benchmarking OTOH is finding an approximation of the exact
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090607/45b57884/attachment.pgp>