[FFmpeg-devel] [PATCH] ALS decoder

Sun Aug 23 20:29:54 CEST 2009

On Sun, Aug 23, 2009 at 02:00:22PM +0200, Thilo Borgmann wrote:
> Revision 6 attached.
> 
> Depends on ceillog2.rev1.patch.

[...]
> +/** Reads an ALSSpecificConfig from a buffer into the output struct.
> + */
> +static av_cold int read_specific_config(ALSDecContext *ctx)
> +{
> +    GetBitContext gb;
> +    uint64_t ht_size;
> +    int i, config_offset, crc_enabled;
> +    MPEG4AudioConfig m4ac;
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    AVCodecContext *avctx    = ctx->avctx;
> +    const uint8_t *buffer    = avctx->extradata;
> +    int buffer_size          = avctx->extradata_size;
> +    uint32_t samples;
> +
> +    init_get_bits(&gb, buffer, buffer_size * 8);
> +
> +    config_offset = ff_mpeg4audio_get_config(&m4ac, buffer, buffer_size);
> +
> +    if (config_offset < 0)
> +        return -1;
> +
> +    skip_bits_long(&gb, config_offset);
> +    buffer_size -= config_offset >> 3;
> +
> +    if (buffer_size < 22)
> +        return -1;
> +
> +    // read the fixed items
> +    sconf->als_id               = get_bits_long(&gb, 32);
> +    avctx->sample_rate          = m4ac.sample_rate;
> +    skip_bits_long(&gb, 32); // sample rate already known
> +    samples                     = get_bits_long(&gb, 32);
> +    avctx->channels             = m4ac.channels;
> +    skip_bits(&gb, 16);      // number of channels already knwon
> +    skip_bits(&gb, 3);       // skip file_type
> +    sconf->resolution           = get_bits(&gb, 3);
> +    sconf->floating             = get_bits1(&gb);
> +    sconf->msb_first            = get_bits1(&gb);
> +    sconf->frame_length         = get_bits(&gb, 16) + 1;
> +    sconf->ra_distance          = get_bits(&gb, 8);
> +    sconf->ra_flag              = get_bits(&gb, 2);
> +    sconf->adapt_order          = get_bits1(&gb);
> +    sconf->coef_table           = get_bits(&gb, 2);
> +    sconf->long_term_prediction = get_bits1(&gb);
> +    sconf->max_order            = get_bits(&gb, 10);
> +    sconf->block_switching      = get_bits(&gb, 2);
> +    sconf->bgmc                 = get_bits1(&gb);
> +    sconf->sb_part              = get_bits1(&gb);
> +    sconf->joint_stereo         = get_bits1(&gb);
> +    sconf->mc_coding            = get_bits1(&gb);
> +    sconf->chan_config          = get_bits1(&gb);
> +    sconf->chan_sort            = get_bits1(&gb);
> +    crc_enabled                 = get_bits1(&gb);
> +    sconf->rlslms               = get_bits1(&gb);
> +    skip_bits(&gb, 5);       // skip 5 reserved bits
> +    sconf->aux_data_enabled     = get_bits1(&gb);
> +    buffer_size -= 22;
> +
> +
> +    // check for ALSSpecificConfig struct
> +    if (sconf->als_id != MKBETAG('A','L','S','\0'))
> +        return -1;
> +
> +    ctx->cur_frame_length = sconf->frame_length;
> +

> +    // allocate quantized parcor coefficient buffer
> +    if (!(ctx->quant_cof = av_malloc(sizeof(int64_t) * sconf->max_order)) ||
> +        !(ctx->lpc_cof = av_malloc(sizeof(int64_t) * sconf->max_order))) {

sizeof(*ctx->lpc_cof)


[...]
> +
> +/** Parses the bs_info field to extract the block partitioning used in block switching mode,
> + *  refer to ISO/IEC 14496-3, section 11.6.2.
> + */
> +static void parse_bs_info(uint32_t bs_info, unsigned int n, unsigned int div,
> +                          unsigned int **div_blocks, unsigned int *num_blocks)
> +{

> +    if (n < 31 && ((bs_info >> (30 - n)) & 1)) {
> +        // if the level is valid and the investigated bit n is set
> +        // then recursively check both children at bits (2n+1) and (2n+2)
> +        n   *= 2;
> +        div += 1;

this sounds like it could use get_bits()


> +        parse_bs_info(bs_info, n + 1, div, div_blocks, num_blocks);
> +        parse_bs_info(bs_info, n + 2, div, div_blocks, num_blocks);
> +    } else {
> +        // else the bit is not set or the last level has been reached
> +        // (bit implicitly not set)
> +        **div_blocks = div;
> +        (*div_blocks)++;
> +        (*num_blocks)++;
> +    }
> +}
> +
> +

> +/** Reads and decodes a Rice codeword.
> + */
> +static int64_t decode_rice(GetBitContext *gb, unsigned int k)
> +{
> +    int     max = gb->size_in_bits - get_bits_count(gb) - k;
> +
> +    if (k > 1) {
[...]
> +        q         = get_unary(gb, 0, max);
[...]
> +    } else {
> +        int q;
> +        q = get_unary(gb, 0, max);
[...]

please factorize this out, also please factorize anything else out
if there is something that can be factored out


> +/** Converts PARCOR coefficient k to direct filter coefficient.
> + */
> +static void parcor_to_lpc(unsigned int k, int64_t *par, int64_t *cof)
> +{
> +    int i;
> +
> +    for (i = 0; i < (k+1) >> 1; i++) {
> +        int32_t tmp1, tmp2;
> +
> +        tmp1 = cof[    i    ] + ((par[k] * cof[k - i - 1] + (1 << 19)) >> 20);
> +        tmp2 = cof[k - i - 1] + ((par[k] * cof[    i    ] + (1 << 19)) >> 20);
> +        cof[k - i - 1] = tmp2;
> +        cof[    i    ] = tmp1;

as the intermediate tmp* are 32bit so likely can be cof, making
multiplications faster


> +    }
> +
> +    cof[k] = par[k];
> +}
> +
> +
> +/** Reformat block sizes from log2 format to direct form. Also assure that the
> + *  block sizes of the last frame correspond to the actual number of samples.
> + */
> +static void reconstruct_block_sizes(ALSDecContext *ctx, uint32_t *div_blocks)
> +{
> +    unsigned int b;
> +
> +    // The last frame may have an overdetermined block structure given in
> +    // the bitstream. In that case the defined block structure would need
> +    // more samples than available to be consistent.
> +    // The block structure is actually used but the block sizes are adapted
> +    // to fit the actual number of available samples.
> +    // Example: 5 samples, 2nd level block sizes: 2 2 2 2.
> +    // This results in the actual block sizes:    2 2 1 0.
> +    // This is not specified in 14496-3 but actually done by the reference
> +    // codec RM22 revision 2.
> +    // This appears to happen in case of an odd number of samples in the last
> +    // frame which is actually not allowed by the block length switching part
> +    // of 14496-3.
> +    // The ALS conformance files feature an odd number of samples in the last
> +    // frame.

> +    if (ctx->cur_frame_length == ctx->last_frame_length) {
> +        unsigned int remaining = ctx->cur_frame_length;
> +
> +        for (b = 0; b < ctx->num_blocks; b++) {
> +            div_blocks[b] = ctx->sconf.frame_length >> div_blocks[b];
[...]
> +        }
> +    } else {
> +        for (b = 0; b < ctx->num_blocks; b++)
> +            div_blocks[b] = ctx->sconf.frame_length >> div_blocks[b];

duplicate


[...]
> +/** Reads the block data for a non-constant block
> + */
> +static int read_var_block(ALSDecContext *ctx, unsigned int ra_block,
> +                          int64_t *raw_samples, unsigned int block_length,
> +                          unsigned int *js_blocks, int64_t *raw_other,
> +                          unsigned int *shift_lsbs)
> +{
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    AVCodecContext *avctx    = ctx->avctx;
> +    GetBitContext *gb        = &ctx->gb;
> +    unsigned int k;
> +    unsigned int s[8];
> +    unsigned int sub_blocks, sb_length;
> +    unsigned int opt_order  = 1;
> +    int64_t      *quant_cof = ctx->quant_cof;
> +    int64_t      *lpc_cof   = ctx->lpc_cof;
> +    unsigned int start      = 0;
> +    int          sb, smp;
> +    int64_t      y;
> +
> +    *js_blocks  = get_bits1(gb);
> +
> +    // determine the number of sub blocks for entropy decoding
> +    if (!sconf->bgmc && !sconf->sb_part)
> +        sub_blocks = 1;
> +    else if (sconf->bgmc && sconf->sb_part)
> +        sub_blocks = 1 << get_bits(gb, 2);
> +    else
> +        sub_blocks = 1 << (2 * get_bits1(gb));
> +
> +    // do not continue in case of a damaged stream since
> +    // block_length must be evenly divisible by sub_blocks
> +    if (block_length % sub_blocks) {
> +        av_log(avctx, AV_LOG_WARNING,
> +               "Block length is not evenly divisible by the number of sub blocks.\n");
> +        return -1;
> +    }
> +
> +    sb_length = block_length / sub_blocks;
> +
> +
> +    if (sconf->bgmc) {
> +        // TODO: BGMC mode
> +    } else {
> +        s[0] = get_bits(gb, (sconf->resolution > 1) ? 5 : 4);
> +        for (k = 1; k < sub_blocks; k++)
> +            s[k] = s[k - 1] + decode_rice(gb, 0);
> +    }
> +
> +    if (get_bits1(gb)) {
> +        *shift_lsbs = get_bits(gb, 4) + 1;
> +    }
> +
> +
> +    if (!sconf->rlslms) {
> +        int64_t quant_index;
> +
> +        if (sconf->adapt_order) {
> +            int opt_order_length =
> +                    FFMIN(
> +                    av_ceil_log2(sconf->max_order+1),
> +                    FFMAX(av_ceil_log2((block_length >> 3) - 1), 1)
> +                    );
> +            opt_order = get_bits(gb, opt_order_length);
> +        } else {
> +            opt_order = sconf->max_order;
> +        }
> +
> +        if (opt_order) {
> +            if (sconf->coef_table == 3) {
> +
> +                // read coefficient 0
> +                quant_index = get_bits(gb, 7);
> +                quant_cof[0] = parcor_scaled_values[quant_index];;
> +
> +                // read coefficient 1
> +                quant_index = get_bits(gb, 7);
> +                quant_cof[1] = -parcor_scaled_values[quant_index];
> +
> +                // read coefficients 2 to opt_order
> +                for (k = 2; k < opt_order; k++) {
> +                    quant_index = get_bits(gb, 7);
> +                    quant_cof[k] = (quant_index << 14) - (0x7F << 13);
> +                }
> +            } else {
> +                int offset, rice_param, k_max;
> +

> +                // read coefficient 0
> +                offset       = parcor_rice_table[sconf->coef_table][0][0];
> +                rice_param   = parcor_rice_table[sconf->coef_table][0][1];
> +                quant_index  = decode_rice(gb, rice_param) + offset;
> +                quant_cof[0] = parcor_scaled_values[quant_index + 64];
> +
> +                // read coefficient 1
> +                offset       = parcor_rice_table[sconf->coef_table][1][0];
> +                rice_param   = parcor_rice_table[sconf->coef_table][1][1];
> +                quant_index  = decode_rice(gb, rice_param) + offset;
> +                quant_cof[1] = -parcor_scaled_values[quant_index + 64];
> +
> +                // read coefficients 2 to 19
> +                k_max = FFMIN(20, opt_order);
> +                for (k = 2; k < k_max; k++) {
> +                    offset       = parcor_rice_table[sconf->coef_table][k][0];
> +                    rice_param   = parcor_rice_table[sconf->coef_table][k][1];
> +                    quant_index  = decode_rice(gb, rice_param) + offset;
> +                    quant_cof[k] = (quant_index << 14) + (1 << 13);
> +                }

the 3 first lines of these 3 blocks of code are duplicated and can be factorized


[...]
> +/** Reads the block data.
> + */
> +static int read_block_data(ALSDecContext *ctx, unsigned int ra_block,
> +                            int64_t *raw_samples, unsigned int block_length,
> +                            unsigned int *js_blocks, int64_t *raw_other)
> +{
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    GetBitContext *gb        = &ctx->gb;
> +    unsigned int shift_lsbs  = 0;

> +    unsigned int block_type;
> +    unsigned int k;
> +
> +    block_type = get_bits1(gb);
> +
> +    if (block_type == 0) {

the temporary variable block_type is unneeded


> +        read_const_block(ctx, raw_samples, block_length, js_blocks);
> +    } else {
> +        if (read_var_block(ctx, ra_block, raw_samples, block_length, js_blocks,
> +                           raw_other, &shift_lsbs))
> +            return -1;
> +    }
> +
> +    if (sconf->rlslms) {
> +        // TODO: read RLSLMS extension data
> +    }
> +
> +    if (!sconf->mc_coding || ctx->js_switch) {
> +        align_get_bits(gb);
> +    }
> +
> +    if (shift_lsbs) {
> +        for (k = 0; k < block_length; k++)
> +            raw_samples[k] <<= shift_lsbs;
> +    }
> +
> +    return 0;
> +}
> +
> +

> +/** Decodes blocks independently.
> + */
> +static int decode_blocks_ind(ALSDecContext *ctx, unsigned int ra_frame,
> +                             unsigned int c, unsigned int *div_blocks,
> +                             unsigned int *js_blocks)
> +{
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    int64_t *raw_sample;
> +    unsigned int b, ra_block;
> +    raw_sample = ctx->raw_samples[c];
> +
> +    for (b = 0; b < ctx->num_blocks; b++) {
> +        ra_block = !b && ra_frame;

id use ra_frame rename it to ra_block and set it to 0 at the end of the loop
this simplification can likely be done for more code


> +        if (read_block_data(ctx, ra_block, raw_sample,
> +                            div_blocks[b], &js_blocks[0], NULL)) {
> +            // damaged block, write zero for the rest of the frame
> +            while (b < ctx->num_blocks) {
> +                memset(raw_sample, 0, div_blocks[b]);
> +                raw_sample += div_blocks[b];
> +                b++;
> +            }
> +            return -1;
> +        }
> +        raw_sample += div_blocks[b];
> +    }
> +

> +    // store carryover raw samples
> +    memmove((ctx->raw_samples[c]) - sconf->max_order,
> +            (ctx->raw_samples[c]) - sconf->max_order + sconf->frame_length,
> +            sizeof(int64_t) * sconf->max_order);

sizeof(an entry of the array) is much better because if the type changes
you donz have to update all that code


> +
> +    return 0;
> +}
> +
> +
> +/** Decodes blocks dependently.
> + */
> +static int decode_blocks(ALSDecContext *ctx, unsigned int ra_frame,
> +                         unsigned int c, unsigned int *div_blocks,
> +                         unsigned int *js_blocks)
> +{
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    unsigned int offset = 0;
> +    int64_t *raw_samples_R;
> +    int64_t *raw_samples_L;
> +    unsigned int b, ra_block;
> +
> +    // decode all blocks
> +    for (b = 0; b < ctx->num_blocks; b++) {
> +        unsigned int s;
> +        raw_samples_L = ctx->raw_samples[c    ] + offset;
> +        raw_samples_R = ctx->raw_samples[c + 1] + offset;
> +        ra_block = !b && ra_frame;
> +        if (read_block_data(ctx, ra_block, raw_samples_L, div_blocks[b],
> +                            &js_blocks[0], raw_samples_R) ||
> +            read_block_data(ctx, ra_block, raw_samples_R, div_blocks[b],
> +                            &js_blocks[1], raw_samples_L)) {
> +            // damaged block, write zero for the rest of the frame
> +            while (b < ctx->num_blocks) {
> +                memset(raw_samples_L, 0, div_blocks[b]);
> +                memset(raw_samples_R, 0, div_blocks[b]);
> +                raw_samples_L += div_blocks[b];
> +                raw_samples_R += div_blocks[b];
> +                b++;
> +            }
> +            return -1;
> +        }
> +
> +        // reconstruct joint-stereo blocks
> +        if (js_blocks[0]) {
> +            if (js_blocks[1])
> +                av_log(ctx->avctx, AV_LOG_WARNING, "Invalid channel pair!\n");
> +
> +            for (s = 0; s < div_blocks[b]; s++)
> +                raw_samples_L[s] = raw_samples_R[s] - raw_samples_L[s];
> +        } else if (js_blocks[1]) {
> +            for (s = 0; s < div_blocks[b]; s++)
> +                raw_samples_R[s] = raw_samples_R[s] + raw_samples_L[s];
> +        }
> +
> +        offset += div_blocks[b];
> +    }
> +
> +    // store carryover raw samples
> +    memmove((ctx->raw_samples[c]) - sconf->max_order,
> +            (ctx->raw_samples[c]) - sconf->max_order + sconf->frame_length,
> +            sizeof(int64_t) * sconf->max_order);
> +
> +    memmove((ctx->raw_samples[c + 1]) - sconf->max_order,
> +            (ctx->raw_samples[c + 1]) - sconf->max_order + sconf->frame_length,
> +            sizeof(int64_t) * sconf->max_order);
> +
> +    return 0;
> +}

this looks similar to decode_blocks_ind() i guess some parts could be
factored


[...]
> +/** Initializes the ALS decoder.
> + */
> +static av_cold int decode_init(AVCodecContext *avctx)
> +{
> +    unsigned int c;
> +    unsigned int channel_size;
> +    ALSDecContext *ctx = avctx->priv_data;
> +    ALSSpecificConfig *sconf = &ctx->sconf;
> +    ctx->avctx = avctx;
> +
> +    if (!avctx->extradata) {
> +        av_log(avctx, AV_LOG_ERROR, "Missing required ALS extradata.\n");
> +        return -1;
> +    }
> +
> +    if (read_specific_config(ctx)) {
> +        av_log(avctx, AV_LOG_ERROR, "Reading ALSSpecificConfig failed.\n");
> +        decode_end(avctx);
> +        return -1;
> +    }
> +
> +    if (check_specific_config(ctx)) {
> +        decode_end(avctx);
> +        return -1;
> +    }
> +
> +    if (sconf->floating) {
> +        avctx->sample_fmt          = SAMPLE_FMT_FLT;

> +        avctx->bits_per_raw_sample = 32;

why is this not set to 24 for simplifying that if(), i dont think
bits_per_raw_sample has a meaning currently for floats but maybe i
forgot something


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090823/651897c3/attachment.pgp>