[FFmpeg-devel] AMR-NB decoder

Michael Niedermayer michaelni
Fri Aug 14 01:10:19 CEST 2009


On Mon, Aug 10, 2009 at 09:54:53PM +0100, Colin McQuillan wrote:
> 2009/8/6 Michael Niedermayer <michaelni at gmx.at>:
> > On Wed, Aug 05, 2009 at 05:51:36PM +0100, Colin McQuillan wrote:
> >> Attached is a patch for an AMR-NB decoder.
> >>
> >> It is not bit-exact. This makes it tricky to verify, but I have been
> >> checking that internal parameters match the 3GPP decoder for the AMR
> >> test sequences. The PSNR between the input and output is 3.90 to 8.42
> >> which is about the same as the reference decoder. The PSNR between the
> >> two outputs is between 8.50 and 18.16, which seems quite good.
> >
> [...]
> 
> >> +
> >> +
> >> +// The following order* tables are used to convert AMR frame parameters to and
> >> +// from a bitstream. See 3GPP TS 26.101 for more information.
> >> +
> >> +#define AMR_BIT(field, bit) ? ? ? ? ? ? ? ? ?{offsetof(AMRNBFrame, field) >> 1, bit}
> >
> >> +/** Specify an LSF parameter bit */
> >> +#define AMR_LSF(variable, bit) ? ? ? ? ? ? ? AMR_BIT(lsf[variable], bit)
> >> +/** Specify a subframe-specific bit */
> >> +#define AMR_OF(frame_num, variable, bit) ? ? AMR_BIT(info.subframe[frame_num].variable, bit)
> >> +/** Specify a pitch gain bit */
> >> +#define AMR_PGAIN(frame_num, bit) ? ? ? ? ? ?AMR_OF(frame_num, p_gain, bit)
> >> +/** Specify a fixed gain bit */
> >> +#define AMR_FIXED_GAIN(frame_num, bit) ? ? ? AMR_OF(frame_num, fixed_gain, bit)
> >> +/** Specify a pitch lag bit */
> >> +#define AMR_PLAG(frame_num, bit) ? ? ? ? ? ? AMR_OF(frame_num, p_lag, bit)
> >> +/** Specify a pulse bit */
> >> +#define AMR_PULSES(frame_num, pulse_id, bit) AMR_OF(frame_num, pulses[pulse_id], bit)
> >> +/** Specify an SID reference vector bit */
> >> +#define AMR_SVECTOR(bit) ? ? ? ? ? ? ? ? ? ? AMR_BIT(info.sid.ref_vector, bit)
> >> +/** Specify an SID energy index bit */
> >> +#define AMR_SENERGY(bit) ? ? ? ? ? ? ? ? ? ? AMR_BIT(info.sid.energy, bit)
> >
> > are these macros really useful?
> 
> Yes, very useful for the bit order tables. They allow four bits per
> line instead of just two if I used offsetof.

i did not mean AMR_BIT / AMR_OF but the others


> 
> [...]
> 
> >> +{
> >> + ? ?enum Mode mode;
> >> +
> >> + ? ?init_get_bits(&p->gb, buf, buf_size * 8);
> >> +
> >> + ? ?// Decode the first octet.
> >> + ? ?skip_bits(&p->gb, 1); ? ? ? ? ? ? ? ? ? ? ? ?// padding bit
> >> + ? ?mode = get_bits(&p->gb, 4); ? ? ? ? ? ? ? ? ?// frame type
> >> + ? ?p->bad_frame_indicator = !get_bits1(&p->gb); // quality bit
> >> + ? ?skip_bits(&p->gb, 2); ? ? ? ? ? ? ? ? ? ? ? ?// two padding bits
> >> +
> >> + ? ?if (mode <= MODE_DTX) {
> >> + ? ? ? ?uint16_t *data = (uint16_t *)&p->frame;
> >> + ? ? ? ?const AMROrder *order = amr_unpacking_bitmaps_per_mode[mode];
> >> + ? ? ? ?int i;
> >> +
> >> + ? ? ? ?memset(&p->frame, 0, sizeof(AMRNBFrame));
> >
> >> + ? ? ? ?for (i = 0; i < mode_bits[mode]; i++)
> >> + ? ? ? ? ? ?data[order[i].index] += get_bits1(&p->gb) << order[i].bit;
> >
> > it might reduce code size and improve speed if more than 1 bit would
> > be read where possible
> 
> The bits are very scattered. (I guess because it's not really designed
> as a file format - it's just a wire protocol dumped to a file.) There
> are 1240 bits overall, and 874 sequences of consecutive bits (about
> two hundred runs in either direction). I've tried reading more than
> one bit at a time and it was slower.

:/
still, the reorder tables are not efficiently stored
even totally random they should only need half the space

for example the following should need less space:

uint16_t *dst;
for(dst= our_struct; dst < end; dst++){
    int v=0;
    while(*tab){
        v += v + ((src[*tab >> 3] >> (*tab & 7)) & 1);
        tab++;
    }
    *dst++= v;
}




[...]
> +/** Frame type (Table 1a in 3GPP TS 26.101) */
> +enum Mode {
> +    MODE_475 = 0,                         ///< 4.75 kbit/s
> +    MODE_515,                             ///< 5.15 kbit/s
> +    MODE_59,                              ///< 5.90 kbit/s
> +    MODE_67,                              ///< 6.70 kbit/s
> +    MODE_74,                              ///< 7.40 kbit/s
> +    MODE_795,                             ///< 7.95 kbit/s
> +    MODE_102,                             ///< 10.2 kbit/s
> +    MODE_122,                             ///< 12.2 kbit/s

id call them like
MODE_4k75
MODE_12k2

this is more readable and less ambiguous to someone not knowing the rates


[...]
> +/**
> + * AMRNB SID frame parameters
> + */
> +typedef struct {
> +    uint16_t ref_vector; ///< index of reference vector
> +    uint16_t energy;     ///< index of logarithmic frame energy
> +} AMRNBSIDFrame;
> +
> +/**
> + * AMRNB unpacked data frame
> + */
> +typedef struct {
> +    uint16_t lsf[5];           ///< lsf parameters: 5 parameters for MODE_122, only 3 for other modes
> +    AMRNBSubframe subframe[4]; ///< unpacked data for each subframe

> +    AMRNBSIDFrame sid;

is there any point in having this as a seperate struct?


> +} AMRNBFrame;
> +
> +
> +// The following order* tables are used to convert AMR frame parameters to and
> +// from a bitstream. See 3GPP TS 26.101 for more information.
> +
> +#define AMR_BIT(field, bit)                  {offsetof(AMRNBFrame, field) >> 1, bit}
> +/** Specify an LSF parameter bit */
> +#define AMR_LSF(variable, bit)               AMR_BIT(lsf[variable], bit)
> +/** Specify a subframe-specific bit */
> +#define AMR_OF(frame_num, variable, bit)     AMR_BIT(subframe[frame_num].variable, bit)
> +/** Specify a pitch gain bit */
> +#define AMR_PGAIN(frame_num, bit)            AMR_OF(frame_num, p_gain, bit)
> +/** Specify a fixed gain bit */
> +#define AMR_FGAIN(frame_num, bit)       AMR_OF(frame_num, fixed_gain, bit)
> +/** Specify a pitch lag bit */
> +#define AMR_PLAG(frame_num, bit)             AMR_OF(frame_num, p_lag, bit)
> +/** Specify a pulse bit */
> +#define AMR_PULSE(frame_num, pulse_id, bit) AMR_OF(frame_num, pulses[pulse_id], bit)
> +/** Specify an SID reference vector bit */
> +#define AMR_SVECTOR(bit)                     AMR_BIT(sid.ref_vector, bit)
> +/** Specify an SID energy index bit */
> +#define AMR_SENERGY(bit)                     AMR_BIT(sid.energy, bit)
> +
> +static const AMROrder order_MODE_475[95] = {
> + AMR_LSF  (  0, 7), AMR_LSF  (  0, 6), AMR_LSF  (  0, 5), AMR_LSF  (  0, 4),
> + AMR_LSF  (  0, 3), AMR_LSF  (  0, 2), AMR_LSF  (  0, 1), AMR_LSF  (  0, 0),
> + AMR_LSF  (  1, 7), AMR_LSF  (  1, 6), AMR_LSF  (  1, 5), AMR_LSF  (  1, 4),
> + AMR_LSF  (  1, 3), AMR_LSF  (  1, 2), AMR_LSF  (  1, 1), AMR_LSF  (  1, 0),
> + AMR_PLAG (0,   7), AMR_PLAG (0,   6), AMR_PLAG (0,   5), AMR_PLAG (0,   4),
> + AMR_PLAG (0,   3), AMR_PLAG (0,   2), AMR_PLAG (1,   3), AMR_PLAG (1,   2),
> + AMR_PLAG (2,   3), AMR_PLAG (2,   2), AMR_PLAG (3,   3), AMR_PLAG (3,   2),
> + AMR_PGAIN(0,   0), AMR_PGAIN(0,   1), AMR_PGAIN(0,   2), AMR_PGAIN(0,   3),
> + AMR_PGAIN(2,   0), AMR_PGAIN(2,   1), AMR_PGAIN(2,   2), AMR_PGAIN(2,   3),
> + AMR_LSF  (  2, 5), AMR_LSF  (  2, 4), AMR_LSF  (  2, 2), AMR_LSF  (  2, 0),
> + AMR_PGAIN(2,   4), AMR_PGAIN(2,   5), AMR_PGAIN(2,   6), AMR_PGAIN(2,   7),
> + AMR_PLAG (0,   1), AMR_PLAG (0,   0), AMR_PGAIN(0,   4), AMR_PGAIN(0,   5),
> + AMR_PGAIN(0,   6), AMR_PGAIN(0,   7), AMR_PULSE(0,1, 1), AMR_PULSE(0,1, 0),
> + AMR_LSF  (  2, 6), AMR_LSF  (  2, 3), AMR_LSF  (  2, 1), AMR_PLAG (1,   1),
> + AMR_PLAG (1,   0), AMR_PULSE(1,1, 1), AMR_PULSE(1,1, 0), AMR_PLAG (2,   1),
> + AMR_PLAG (2,   0), AMR_PULSE(2,1, 1), AMR_PULSE(2,1, 0), AMR_PLAG (3,   1),
> + AMR_PLAG (3,   0), AMR_PULSE(3,1, 1), AMR_PULSE(3,1, 0), AMR_PULSE(0,0, 5),
> + AMR_PULSE(0,0, 4), AMR_PULSE(0,0, 2), AMR_PULSE(0,0, 1), AMR_PULSE(1,0, 5),
> + AMR_PULSE(1,0, 4), AMR_PULSE(1,0, 2), AMR_PULSE(1,0, 1), AMR_PULSE(2,0, 5),
> + AMR_PULSE(2,0, 4), AMR_PULSE(2,0, 2), AMR_PULSE(2,0, 1), AMR_PULSE(3,0, 5),
> + AMR_PULSE(3,0, 4), AMR_PULSE(3,0, 2), AMR_PULSE(3,0, 1), AMR_PULSE(0,0, 3),
> + AMR_PULSE(1,0, 3), AMR_PULSE(2,0, 3), AMR_PULSE(3,0, 3), AMR_PULSE(0,0, 0),
> + AMR_PULSE(1,0, 0), AMR_PULSE(2,0, 0), AMR_PULSE(3,0, 0), AMR_PULSE(0,0, 6),
> + AMR_PULSE(1,0, 6), AMR_PULSE(2,0, 6), AMR_PULSE(3,0, 6)
> +};
> +
> +static const AMROrder order_MODE_515[103] = {
> + AMR_LSF  (  0, 0), AMR_LSF  (  0, 1), AMR_LSF  (  0, 2), AMR_LSF  (  0, 3),
> + AMR_LSF  (  0, 4), AMR_LSF  (  0, 5), AMR_LSF  (  0, 6), AMR_LSF  (  0, 7),
> + AMR_LSF  (  1, 0), AMR_LSF  (  1, 1), AMR_LSF  (  1, 2), AMR_LSF  (  1, 3),
> + AMR_LSF  (  1, 4), AMR_LSF  (  1, 5), AMR_LSF  (  1, 6), AMR_LSF  (  1, 7),
> + AMR_PLAG (0,   7), AMR_PLAG (0,   6), AMR_PLAG (0,   5), AMR_PLAG (0,   4),
> + AMR_PLAG (0,   3), AMR_PLAG (1,   3), AMR_PLAG (2,   3), AMR_PLAG (3,   3),

the placing of spaces is inconsistent

[...]




> +// LSF tables
> +
> +// These are stored as integers to save space. The values are taken from
> +// q_plsf_3.tab and q_plsf_5.tab in 3GPP TS 26.090.
...

can be commited if they cant be stored more efficiently



[...]

> +/** In 12.2kbit/s mode, positions are divided into TRACKS classes. */
> +#define TRACKS          5
> +/** In 10.2kbit/s mode, positions are divided into TRACKS_MODE_102 classes. */
> +#define TRACKS_MODE_102 4

shouldnt the first be TRACKS_MODE_122 ?


[...]
> +typedef struct AMRContext {
> +    AMRNBFrame                        frame; ///< decoded AMR parameters (lsf coefficients, codebook indexes, etc)
> +    uint8_t             bad_frame_indicator; ///< bad frame ? 1 : 0
> +    enum Mode                cur_frame_mode;
> +
> +    int16_t     prev_lsf_r[LP_FILTER_ORDER]; ///< residual LSF vector from previous subframe
> +    float           lsp[4][LP_FILTER_ORDER]; ///< lsp vectors from current frame
> +    float    prev_lsp_sub4[LP_FILTER_ORDER]; ///< lsp vector for the 4th subframe of the previous frame
> +
> +    float         lsf_q[4][LP_FILTER_ORDER]; ///< Interpolated LSF vector for fixed gain smoothing
> +    float          lsf_avg[LP_FILTER_ORDER]; ///< vector of averaged lsf vector
> +
> +    float           lpc[4][LP_FILTER_ORDER]; ///< lpc coefficient vectors for 4 subframes
> +
> +    uint8_t                   pitch_lag_int; ///< integer part of pitch lag from current subframe
> +
> +    float excitation_buf[PITCH_LAG_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; ///< current excitation and all necessary excitation history
> +    float                       *excitation; ///< pointer to the current excitation vector in excitation_buf
> +
> +    float   pitch_vector[AMR_SUBFRAME_SIZE]; ///< adaptive code book (pitch) vector
> +    float   fixed_vector[AMR_SUBFRAME_SIZE]; ///< algebraic codebook (fixed) vector (must be kept zero between frames)
> +
> +    float               prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes
> +    float                     pitch_gain[5]; ///< quantified pitch gains for the current and previous four subframes
> +    float                     fixed_gain[5]; ///< quantified fixed gains for the current and previous four subframes
> +
> +    float                              beta; ///< previous pitch_gain, bounded by [0.0,SHARP_MAX]
> +    uint8_t                      diff_count; ///< the number of subframes for which diff has been above 0.65
> +    uint8_t                      hang_count; ///< the number of subframes since a hangover period started
> +
> +    float            prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness processing to determine "onset"

> +    uint8_t         prev_ir_filter_strength; ///< previous impulse response filter strength; 0 - strong, 1 - medium, 2 - none

this differs from english semantics of strength, i mean
one intuitively would assume larger numbers to be stronger ...


[...]
> +/**
> + * Convert an lsf vector into an lsp vector.
> + *
> + * @param lsf               input lsf vector
> + * @param lsp               output lsp vector
> + */
> +static void lsf2lsp(float *lsf, float *lsp)

const missing


> +{
> +    int i;
> +
> +    for (i = 0; i < LP_FILTER_ORDER; i++)
> +        lsp[i] = cos(lsf[i] * 2.0 * M_PI / 8000.0);

sightly reordering that could help some compilers merging the 3
constants. As is a compier cannot safetly do it as the associative law
does not strictly hold for floats

[...]
> +/**
> + * Adjust the quantized LSFs so they are increasing and not too close.
> + *
> + * This step isn't mentioned in the spec but is in the reference C decoder.
> + * Omitting this step creates audible distortion on the sinusoidal sweep
> + * test vectors in 3GPP TS 26.074.
> + *
> + * @param[in,out] lsf    LSFs in Hertz
> + */
> +static void adjust_lsf(float *lsf)

make_increasing() or something like that adjust is a pretty generic term


[...]

> +/**
> + * Decode a set of 3 split-matrix quantized lsf indexes into an lsp vector.
> + *
> + * @param p                 pointer to the AMRContext
> + */
> +static void lsf2lsp_3(AMRContext *p)
> +{
> +    const uint16_t *lsf_param = p->frame.lsf;
> +    int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector
> +    float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector
> +    const int16_t *lsf_quantizer;
> +    int i;
> +
> +    lsf_quantizer = (p->cur_frame_mode == MODE_795 ? lsf_3_1_MODE_795 : lsf_3_1)[lsf_param[0]];
> +    memcpy(lsf_r, lsf_quantizer, 3 * sizeof(*lsf_r));
> +
> +    lsf_quantizer = lsf_3_2[lsf_param[1] << (p->cur_frame_mode <= MODE_515)];
> +    memcpy(lsf_r + 3, lsf_quantizer, 3 * sizeof(*lsf_r));
> +
> +    lsf_quantizer = (p->cur_frame_mode <= MODE_515 ? lsf_3_3_MODE_515 : lsf_3_3)[lsf_param[2]];
> +    memcpy(lsf_r + 6, lsf_quantizer, 4 * sizeof(*lsf_r));
> +
> +    // calculate mean-removed LSF vector and add mean
> +    for (i = 0; i < LP_FILTER_ORDER; i++)
> +        lsf_q[i] = (lsf_r[i] + p->prev_lsf_r[i] * pred_fac[i]) * LSF_R_FAC + lsf_3_mean[i];
> +
> +    adjust_lsf(lsf_q);
> +
> +    // store data for computing the next frame's LSFs
> +    interpolate_lsf(p->lsf_q, lsf_q);
> +    memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r));

there are a lot of avoidable memcopies, and i think not only here


[...]
> +/**
> + * Convert an lsp vector to lpc coefficients.
> + *
> + * @param lsp                 input lsp vector
> + * @param lpc_coeffs          output lpc coefficients
> + */
> +static void lsp2lpc(float *lsp, float *lpc_coeffs)

const


[...9
> +/**
> + * Decode the adaptive codebook index to the integer and fractional parts
> + * of the pitch lag for one subframe at 1/6 resolution for MODE_122,
> + * 1/3 for other modes.
> + *
> + * The choice of pitch lag is described in 3GPP TS 26.090 section 5.6.1.
> + *
> + * @param lag_int             integer part of pitch lag of the current subframe
> + * @param lag_frac            fractional part of pitch lag of the current subframe
> + * @param pitch_index         parsed adaptive codebook (pitch) index
> + * @param prev_lag_int        integer part of pitch lag for the previous subframe
> + * @param subframe            current subframe number
> + * @param mode                mode of the current frame
> + */
> +static void decode_pitch_lag(int *lag_int, int *lag_frac, int pitch_index,
> +                             const int prev_lag_int, const int subframe,
> +                             const enum Mode mode)
> +{
> +    /* Note n * 10923 >> 15 is floor(x/3) for 0 <= n <= 32767 */
> +    if (subframe == 0 ||
> +        (subframe == 2 && mode != MODE_475 && mode != MODE_515)) {
> +        if (mode == MODE_122) {
> +            if (pitch_index < 463) {
> +                *lag_int  = (pitch_index + 107) * 10923 >> 16;
> +                *lag_frac = pitch_index - *lag_int * 6 + 105;
> +            } else {
> +                *lag_int  = pitch_index - 368;
> +                *lag_frac = 0;
> +            }
> +        } else if (pitch_index < 197) {
> +            *lag_int  = (pitch_index + 59) * 10923 >> 15;
> +            *lag_frac = pitch_index - *lag_int * 3 + 58;
> +        } else {
> +            *lag_int  = pitch_index - 112;
> +            *lag_frac = 0;
> +        }

somehow i think this can be substantially simplified
first calculating a single int pitch_lag in 1/6 resolution than in a second
factorized step is split in frac & int
also if it works here it likely works below as well


> +    } else {
> +        if (mode == MODE_122) {

> +            *lag_int  = ((pitch_index + 5) * 10923 >> 16) - 1;
> +            *lag_frac = pitch_index - *lag_int * 6 - 3;
> +            *lag_int += av_clip(prev_lag_int - 5, PITCH_LAG_MIN_MODE_122,
> +                                PITCH_LAG_MAX - 9);

several of these additions and subtraction can be merged i think

[...]
> +/**
> + * Calculate the pitch vector by interpolating the past excitation at the pitch
> + * lag using a b60 hamming windowed sinc function.
> + *
> + * @param pitch_vector buffer that must hold for the previous state of the filter in
> + *                     pitch_vector[-PITCH_LAG_MAX-LP_FILTER_ORDER-1, -1]
> + * @param lag_int             integer part of pitch lag
> + * @param lag_frac            fractional part of pitch lag
> + * @param mode                current frame mode
> + */
> +static void interp_pitch_vector(float *pitch_vector, int lag_int,
> +                                int lag_frac, enum Mode mode)
> +{
> +    int n, i;
> +    const float *b60_idx1, *b60_idx2;
> +    float *exc_idx;
> +
> +    lag_frac *= -1;
> +    if (mode != MODE_122) {
> +        lag_frac <<= 1;
> +    }
> +
> +    if (lag_frac < 0) {
> +        lag_frac += 6;
> +        lag_int++;
> +    }

is it possible to keep lag_frac >0 from the start?
representing a scalar s integer and fractional part where the fractional
part can be both <0 and >0 feels very odd to me but i dont know the code
well enough to say if such a change is possible and would simplify code
or not


[...]
> +/**
> + * Decode the algebraic codebook index to pulse positions and signs and
> + * construct the algebraic codebook vector for MODE_102.
> + *
> + * @param fixed_index          positions of the eight pulses
> + * @param fixed_sparse         pointer to the algebraic codebook vector
> + */
> +static void decode_8_pulses_31bits(const int16_t *fixed_index,
> +                                   AMRFixed *fixed_sparse)
> +{
> +    int pulse_position[8];
> +    int i, temp;
> +
> +    // coded using 7+3 bits with the 3 LSBs being, individually, the LSB of 1 of
> +    // the 3 pulses and the upper 7 bits being coded in base 5
> +    temp = fixed_index[4] >> 3;
> +    pulse_position[0] = (( temp       % 5) << 1) + ( fixed_index[4]       & 1);
> +    pulse_position[4] = (((temp /  5) % 5) << 1) + ((fixed_index[4] >> 1) & 1);
> +    pulse_position[1] = (((temp / 25) % 5) << 1) + ((fixed_index[4] >> 2) & 1);

these % and / are slow, i belive we do have some % / tables somewhere (AAC or (E)AC3
maybe, i didnt check) that maybe could be used or if not it may be worth to add
some tables unless speed of this doesnt matter ...


> +
> +    // coded using 7+3 bits with the 3 LSBs being, individually, the LSB of 1 of
> +    // the 3 pulses and the upper 7 bits being coded in base 5
> +    temp = fixed_index[5] >> 3;
> +    pulse_position[2] = (( temp       % 5) << 1) + ( fixed_index[5]       & 1);
> +    pulse_position[6] = (((temp /  5) % 5) << 1) + ((fixed_index[5] >> 1) & 1);
> +    pulse_position[5] = (((temp / 25) % 5) << 1) + ((fixed_index[5] >> 2) & 1);

looks a little duplicated

> +
> +    // coded using 5+2 bits with the 2 LSBs being, individually, the LSB of 1 of
> +    // the 2 pulses and the upper 5 bits being coded in base 5
> +    temp = ((fixed_index[6] >> 2) * 25 + 12) >> 5;
> +    pulse_position[3] = temp % 5;
> +    pulse_position[7] = temp / 5;
> +    if (pulse_position[7] & 1)
> +        pulse_position[3] = 4 - pulse_position[3];
> +    pulse_position[3] = (pulse_position[3] << 1) + ( fixed_index[6]       & 1);
> +    pulse_position[7] = (pulse_position[7] << 1) + ((fixed_index[6] >> 1) & 1);
> +
> +    fixed_sparse->n = 8;
> +    for (i = 0; i < TRACKS_MODE_102; i++) {
> +        const int pos1   = (pulse_position[i]     << 2) + i;
> +        const int pos2   = (pulse_position[i + 4] << 2) + i;
> +        const float sign = fixed_index[i] ? -1.0 : 1.0;
> +        fixed_sparse->x[i    ] = pos1;
> +        fixed_sparse->x[i + 4] = pos2;
> +        fixed_sparse->y[i    ] = sign;
> +        fixed_sparse->y[i + 4] = pos2 < pos1 ? -sign : sign;
> +    }

the stuff could be put in its final place immedeatly skiping this copy
(of course only if faster/cleaner)


> +}
> +
> +/**
> + * Decode the algebraic codebook index to pulse positions and signs and
> + * construct the algebraic codebook vector for MODE_122.
> + *
> + * @note: The positions and signs are explicitly coded in MODE_122.
> + *
> + * @param fixed_index          positions of the ten pulses
> + * @param fixed_sparse         pointer to the algebraic codebook vector
> + */
> +static void decode_10_pulses_35bits(const int16_t *fixed_index,
> +                                    AMRFixed *fixed_sparse)
> +{
> +    int i;
> +
> +    fixed_sparse->n = 10;
> +    for (i = 0; i < TRACKS; i++) {
> +        const int pos1   = gray_decode[fixed_index[i    ] & 7] * TRACKS + i;
> +        const int pos2   = gray_decode[fixed_index[i + 5] & 7] * TRACKS + i;
> +        const float sign = (fixed_index[i] & 8) ? -1.0 : 1.0;
> +        fixed_sparse->x[i    ] = pos1;
> +        fixed_sparse->x[i + 5] = pos2;
> +        fixed_sparse->y[i    ] = sign;
> +        fixed_sparse->y[i + 5] = pos2 < pos1 ? -sign : sign;
> +    }
> +}
> +
> +/**
> + * Decode the algebraic codebook index to pulse positions and signs,
> + * then construct the algebraic codebook vector.
> + *
> + *                           nb of pulses | bits encoding pulses
> + * For MODE_475 or MODE_515,            2 | 1-3, 4-6, 7
> + *                  MODE_59,            2 | 1,   2-4, 5-6, 7-9
> + *                  MODE_67,            3 | 1-3, 4,   5-7, 8,  9-11
> + *      MODE_74 or MODE_795,            4 | 1-3, 4-6, 7-9, 10, 11-13
> + *
> + * @param fixed_sparse pointer to the algebraic codebook vector
> + * @param pulses       algebraic codebook indexes
> + * @param mode         mode of the current frame
> + * @param subframe     current subframe number
> + */
> +static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulses,
> +                                const enum Mode mode, const int subframe)
> +{
> +    assert(MODE_475 <= mode && mode <= MODE_122);
> +
> +    if (mode == MODE_122) {
> +        decode_10_pulses_35bits(pulses, fixed_sparse);
> +    } else if (mode == MODE_102) {
> +        decode_8_pulses_31bits(pulses, fixed_sparse);
> +    } else {
> +        int *pulse_position = fixed_sparse->x;
> +        int i, pulse_subset;
> +        const int fixed_index = pulses[0];
> +
> +        if (mode <= MODE_515) {
> +            pulse_subset      = ((fixed_index >> 3) & 8)     + (subframe << 1);
> +            pulse_position[0] = ( fixed_index       & 7) * 5 + track_position[pulse_subset];
> +            pulse_position[1] = ((fixed_index >> 3) & 7) * 5 + track_position[pulse_subset + 1];
> +            fixed_sparse->n = 2;
> +        } else if (mode == MODE_59) {
> +            pulse_subset      = ((fixed_index & 1) << 1) + 1;
> +            pulse_position[0] = ((fixed_index >> 1) & 7) * 5 + pulse_subset;
> +            pulse_subset      = (fixed_index  >> 4) & 3;
> +            pulse_position[1] = ((fixed_index >> 6) & 7) * 5 + pulse_subset + (pulse_subset == 3 ? 1 : 0);
> +            fixed_sparse->n = pulse_position[0] == pulse_position[1] ? 1 : 2;
> +        } else if (mode == MODE_67) {
> +            pulse_position[0] = (fixed_index        & 7) * 5;
> +            pulse_subset      = (fixed_index  >> 2) & 2;
> +            pulse_position[1] = ((fixed_index >> 4) & 7) * 5 + pulse_subset + 1;
> +            pulse_subset      = (fixed_index  >> 6) & 2;
> +            pulse_position[2] = ((fixed_index >> 8) & 7) * 5 + pulse_subset + 2;
> +            fixed_sparse->n = 3;
> +        } else { // mode <= MODE_795
> +            pulse_position[0] = gray_decode[ fixed_index        & 7] * 5;
> +            pulse_position[1] = gray_decode[(fixed_index >> 3)  & 7] * 5 + 1;
> +            pulse_position[2] = gray_decode[(fixed_index >> 6)  & 7] * 5 + 2;
> +            pulse_subset      = (fixed_index >> 9) & 1;
> +            pulse_position[3] = gray_decode[(fixed_index >> 10) & 7] * 5 + pulse_subset + 3;
> +            fixed_sparse->n = 4;
> +        }
> +        for (i = 0; i < fixed_sparse->n; i++)
> +            fixed_sparse->y[i] = (pulses[1] >> i) & 1 ? 1.0 : -1.0;
> +    }
> +}

can some of this be merged with ff_acelp_fc_pulse_per_track() ?


[..]
> +/**
> + * Add fixed vector to an array from a sparse representation
> + *
> + * @param out fixed vector with pitch sharpening
> + * @param in sparse fixed vector
> + * @param scale number to multiply the fixed vector by
> + */
> +static void set_fixed_vector(float *out, const AMRFixed *in, float scale) {
> +    int i;
> +    int x;
> +    float y;
> +    if (in->pitch_lag >= AMR_SUBFRAME_SIZE) {
> +        for (i = 0; i < in->n; i++) {
> +            x = in->x[i];
> +            y = in->y[i] * scale;
> +            out[x] += y;
> +        }
> +    } else if (in->pitch_lag >= AMR_SUBFRAME_SIZE >> 1) {
> +        for (i = 0; i < in->n; i++) {
> +            x = in->x[i];
> +            y = in->y[i] * scale;
> +            out[x] += y;
> +
> +            x += in->pitch_lag;
> +            if (x < AMR_SUBFRAME_SIZE)
> +                out[x] += y * in->pitch_fac;
> +        }
> +    } else {
> +        for (i = 0; i < in->n; i++) {
> +            x = in->x[i];
> +            y = in->y[i] * scale;
> +            out[x] += y;
> +
> +            x += in->pitch_lag;
> +            if (x < AMR_SUBFRAME_SIZE) {
> +                y *= in->pitch_fac;
> +                out[x] += y;
> +
> +                x += in->pitch_lag;
> +                if (x < AMR_SUBFRAME_SIZE)
> +                    out[x] += y * in->pitch_fac;
> +            }
> +        }
> +    }
> +}
> +
> +/**
> + * Clear array values set by set_fixed_vector
> + *
> + * @param out fixed vector to be cleared
> + * @param in sparse fixed vector
> + */
> +static void clear_fixed_vector(float *out, const AMRFixed *in) {
> +    int i;
> +    int x;
> +    if (in->pitch_lag >= AMR_SUBFRAME_SIZE) {
> +        for (i = 0; i < in->n; i++)
> +            out[in->x[i]] = 0.0;
> +    } else if (in->pitch_lag >= AMR_SUBFRAME_SIZE >> 1) {
> +        for (i = 0; i < in->n; i++) {
> +            x = in->x[i];
> +            out[x] = 0.0;
> +
> +            x += in->pitch_lag;
> +            if (x < AMR_SUBFRAME_SIZE)
> +                out[x] = 0.0;
> +        }
> +    } else {
> +        for (i = 0; i < in->n; i++) {
> +            x = in->x[i];
> +            out[x] = 0.0;
> +
> +            x += in->pitch_lag;
> +            if (x < AMR_SUBFRAME_SIZE) {
> +                out[x] = 0.0;
> +
> +                x += in->pitch_lag;
> +                if (x < AMR_SUBFRAME_SIZE)
> +                    out[x] = 0.0;
> +            }
> +        }
> +    }
> +}

i guess factorizing these makes them slower?

[...]
> +/**
> + * Decode pitch gain and fixed gain factor (part of section 6.1.3).
> + *
> + * @param p the context
> + * @param amr_subframe unpacked amr subframe
> + * @param mode mode of the current frame
> + * @param subframe current subframe number
> + * @param fixed_gain_factor decoded gain correction factor
> + */
> +static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe,
> +                         const enum Mode mode, const int subframe,
> +                         float *fixed_gain_factor)
> +{
> +    if (mode == MODE_122 || mode == MODE_795) {
> +        p->pitch_gain[4]   = qua_gain_pit [amr_subframe->p_gain    ] / 16384.0;
> +        *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain] / 2048.0;
> +    } else {
> +        const uint16_t *gains =
> +            mode >= MODE_67  ? gains_high[amr_subframe->p_gain] :
> +            mode >= MODE_515 ? gains_low [amr_subframe->p_gain] :
> +                // gain index is only coded in subframes 0,2 for MODE_475
> +                gains_MODE_475[(p->frame.subframe[subframe & 2].p_gain << 1) +
> +                               (subframe & 1)];

this is messy, please use if/else unless you benchmarked it and this is faster


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are too smart to engage in politics are punished by being
governed by those who are dumber. -- Plato 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090814/d7768c3c/attachment.pgp>



More information about the ffmpeg-devel mailing list