[FFmpeg-devel] [PATCH 2/2] Add SIPR decoder for 5k0, 6k5 and 8k5 modes

Michael Niedermayer michaelni
Mon Jan 11 15:24:55 CET 2010


On Sun, Jan 10, 2010 at 04:43:18PM -0500, Vitor Sessak wrote:
> Vitor Sessak wrote:
>> Ronald S. Bultje wrote:
>>> Hi Vitor,
>>>
>>> On Jan 7, 2010, at 10:23 PM, Vitor Sessak <vitor1001 at gmail.com> wrote:
>>>> in:                      { 2, 2.2,  7,  9.5, 10  }
>>>> out:                     { 2,   4,  7,  9.5, 10  }
>>>> ff_acelp_reorder_lsf():  { 2,   4,  7,  9.5, 11.5}
>>>> sane:                    { 2,   4,  7,  9.5, 11.5}
>>>>
>>>> here we see a problem
>>>
>>> Ah, I see what you mean, min_dist is not enforced for the last lsf. Very 
>>> interesting. For WMAVoice, that's not the case, it's exactly like the int 
>>> version. OK, I guess that makes the discussion moot then.
>>>
>>> Does output quality audibly differ when adding in this "feature"? (It 
>>> might just be a decoder bug, WMAVoice is full of them.) Deviating from 
>>> binary can sometimes be OK...
>>>
>>> If the output doesn't change or worsens, patch OK (but please explain 
>>> this difference in a doxy); if it improves, it might be worth using that 
>>> instead.
>> No, the output gets completely garbled if I do like in 
>> ff_acelp_reorder_lsf().
>> Does anyone else wants to comment/review this codec? If nobody comment in 
>> a couple of days I'll commit it.
>
> Committed.
>
> Now the patch that adds 16k support. I'm undecided if I should put this 
> code in a separated file...

Considering that files always grow and that later spliting is not always
easy, id say yes you should unless theres some disadvantage (like speed loss)
also if split the interface between them should be documented


>
> -Vitor

>  sipr.c     |  257 ++++++++++++++++++++++++++++--
>  siprdata.h |  520 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 761 insertions(+), 16 deletions(-)
> daa9f55138b73a225f13548e38a5b1504fc39283  sipr_16k.diff
> Index: libavcodec/siprdata.h
> ===================================================================
> --- libavcodec/siprdata.h	(revision 21125)
> +++ libavcodec/siprdata.h	(working copy)

[...]

I assume the tables arent duplicates of anything else?


[...]
> @@ -159,25 +194,57 @@
>      int gc_index[5];           ///< fixed-codebook gain indexes
>  } SiprParameters;
>  
> +/**
> + * Convert an lsf vector into an lsp vector.
> + *
> + * @param lsf               input lsf vector
> + * @param lsp               output lsp vector
> + */
> +static void lsf2lsp(float *lsf, double *lsp)
> +{
> +    int i;
>  
> +    for (i = 0; i < LP_FILTER_ORDER_16k; i++)
> +        lsp[i] = cos(lsf[i]);
> +}
> +

shouldnt that go into some common code?
also i assume cosf() is worse quality wise?


> -static void dequant(float *out, const int *idx, const float *cbs[])
> +static void dequant(float *out, const int *idx, const float *cbs[],
> +                    int mode_16k)
>  {
>      int i;
> -    int stride  = 2;
> -    int num_vec = 5;

> +    int stride  = mode_16k ? 3 : 2;
> +    int num_vec = 7 - stride;
>  
>      for (i = 0; i < num_vec; i++)
>          memcpy(out + stride*i, cbs[i] + stride*idx[i], stride*sizeof(float));
>  
> +    if (mode_16k)
> +        memcpy(out + 12, cbs[4] + 4*idx[4], 4*sizeof(float));

this is probably more efficient and readable like:
if(mode_16k){
    for (i = 0; i < 4; i++)
        memcpy(out + 3*i, cbs[i] + 3*idx[i], 3*sizeof(float));
    memcpy(out + 12, cbs[4] + 4*idx[4], 4*sizeof(float));
}else
    for (i = 0; i < 5; i++)
        memcpy(out + 2*i, cbs[i] + 2*idx[i], 2*sizeof(float));



>  }
>  
> +static void lsf_decode_fp_16k(float* lsf_history, float* isp_new,
> +                              const int* parm, int ma_pred)
> +{
> +    int i;
> +    float isp_q[LP_FILTER_ORDER_16k];
> +
> +    dequant(isp_q, parm, lsf_codebooks_16k, 1);
> +
> +    for (i = 0; i < LP_FILTER_ORDER_16k; i++) {

> +        isp_new[i] = (1 - qu[ma_pred]) * isp_q[i] +
> +            qu[ma_pred] * lsf_history[i] + mean_lsf_16k[i];

cosmetic suggestion:
isp_new[i] =  (1 - qu[ma_pred]) * isp_q[i]
            + qu[ma_pred] * lsf_history[i] + mean_lsf_16k[i];
or

isp_new[i] =  (1 - qu[ma_pred]) * isp_q[i]
            + qu[ma_pred] * lsf_history[i] 
            + mean_lsf_16k[i];

or even
isp_new[i] = (1 - qu[ma_pred]) * isp_q[i]
                               +
                   qu[ma_pred] * lsf_history[i]
                               +
                         mean_lsf_16k[i];




> +    }
> +
> +    memcpy(lsf_history, isp_q, LP_FILTER_ORDER_16k * sizeof(float));
> +}
> +
>  static void lsf_decode_fp(float *lsfnew, float *lsf_history,
>                            const SiprParameters *parm)
>  {
>      int i;
>      float lsf_tmp[LP_FILTER_ORDER];
>  
> -    dequant(lsf_tmp, parm->vq_indexes, lsf_codebooks);
> +    dequant(lsf_tmp, parm->vq_indexes, lsf_codebooks, 0);
>  
>      for (i = 0; i < LP_FILTER_ORDER; i++)
>          lsfnew[i] = lsf_history[i] * 0.33 + lsf_tmp[i] + mean_lsf[i];

> @@ -196,6 +263,26 @@
>      lsfnew[LP_FILTER_ORDER - 1] *= 6.153848 / M_PI;
>  }
>  
> +static int dec_delay3_1st(int index)
> +{
> +    if (index < 390)
> +        return index + 88;
> +    else

{}
(this applies to more cases, it makes future patches smaller ...)


> +        return 3 * index - 690;
> +}
> +

> +static int dec_delay3_2nd(int index, int pit_min, int pit_max,
> +                          int pitch_lag_prev)
> +{

> +    int pitch_delay_min =
> +        FFMIN(FFMAX(pitch_lag_prev - 10, pit_min), pit_max - 19) ;

av_clip()


> +
> +    if (index < 62)
> +        return 3 * pitch_delay_min + index - 2;
> +    else
> +        return 3 * pitch_lag_prev;

the calculation of pitch_delay_min should be under the if()


> +}
> +
>  /** Apply pitch lag to the fixed vector (AMR section 6.1.2). */
>  static void pitch_sharpening(int pitch_lag_int, float beta,
>                               float *fixed_vector)
> @@ -206,6 +293,136 @@
>          fixed_vector[i] += beta * fixed_vector[i - pitch_lag_int];
>  }
>  
> +static void postfilter(float* synth, float* iir_mem, float* filt_mem[2],
> +                       float* mem_preemph)
> +{
> +    float buf[30 + LP_FILTER_ORDER_16k];
> +    float *tmpbuf = buf + LP_FILTER_ORDER_16k;
> +    float s;
> +    int i;
> +
> +    for (i = 0; i < LP_FILTER_ORDER_16k; i++)
> +        filt_mem[0][i] = iir_mem[i] * pow_0_5[i];
> +
> +    memcpy(tmpbuf - LP_FILTER_ORDER_16k, mem_preemph,
> +           LP_FILTER_ORDER_16k*sizeof(*buf));
> +
> +    ff_celp_lp_synthesis_filterf(tmpbuf, filt_mem[1], synth, 30,
> +                                 LP_FILTER_ORDER_16k);
> +
> +    memcpy(synth - LP_FILTER_ORDER_16k, mem_preemph,
> +           LP_FILTER_ORDER_16k * sizeof(*synth));
> +
> +    ff_celp_lp_synthesis_filterf(synth, filt_mem[0], synth, 2*L_SUBFR_16k,
> +                                 LP_FILTER_ORDER_16k);
> +
> +    memcpy(mem_preemph, synth + 2*L_SUBFR_16k - LP_FILTER_ORDER_16k,
> +           LP_FILTER_ORDER_16k * sizeof(*synth));
> +
> +    FFSWAP(float *, filt_mem[0], filt_mem[1]);
> +    for (i = 0, s = 0; i < 30; i++, s += 1.0/30)

> +        synth[i] = (1.0 - s) * tmpbuf[i] + s * synth[i];

synth[i] = tmpbuf[i] + s * (synth[i] - tmpbuf[i]);


> +}
> +
> +static void decode_frame_16k(SiprContext *ctx, SiprParameters *params,
> +                             float *out_data)
> +{
> +    int frame_size = ctx->m.subframe_count * L_SUBFR_16k;
> +    float *synth = ctx->synth_buf + LP_FILTER_ORDER_16k;
> +    float lsf_new[LP_FILTER_ORDER_16k];
> +    double lsp_new[LP_FILTER_ORDER_16k];
> +    float Az[2][LP_FILTER_ORDER_16k];
> +    float fixed_vector[L_SUBFR_16k];
> +    float pitch_fac, gain_code;
> +
> +    int i;
> +    int pitch_delay_3x, pitch_lag;
> +    int pitch_delay_frac;
> +
> +    float *excitation = ctx->excitation + 292;
> +
> +    lsf_decode_fp_16k(ctx->lsf_history, lsf_new, params->vq_indexes,
> +                      params->ma_pred_switch);
> +
> +    ff_set_min_dist_lsf(lsf_new, LSFQ_DIFF_MIN / 2, LP_FILTER_ORDER_16k);
> +
> +    lsf2lsp(lsf_new, lsp_new);
> +
> +    ff_acelp_lp_decodef(Az[0], Az[1], lsp_new, ctx->lsp_history_16k,
> +                        LP_FILTER_ORDER_16k);
> +
> +    memcpy(ctx->lsp_history_16k, lsp_new, LP_FILTER_ORDER_16k * sizeof(double));
> +
> +    memcpy(synth - LP_FILTER_ORDER_16k, ctx->synth,
> +           LP_FILTER_ORDER_16k * sizeof(*synth));
> +
> +    for (i = 0; i < ctx->m.subframe_count; i++) {
> +        int i_subfr = i * L_SUBFR_16k;
> +        AMRFixed f;
> +        float gain_corr_factor;
> +
> +        if (!i)
> +            pitch_delay_3x = dec_delay3_1st(params->pitch_delay[i]);
> +        else
> +            pitch_delay_3x = dec_delay3_2nd(params->pitch_delay[i],
> +                                            PITCH_MIN, PITCH_MAX,
> +                                            ctx->pitch_lag_prev);
> +
> +        pitch_delay_frac = (pitch_delay_3x+1)%3 - 1;
> +        pitch_lag = (pitch_delay_3x+1)/3;
> +
> +        pitch_fac = gain_pitch_cb_16k[params->gp_index[i]];
> +
> +        ff_acelp_interpolatef(&excitation[i_subfr],
> +                              &excitation[i_subfr] - (pitch_delay_3x+2)/3 + 1,
> +                              sinc_win, 3, (pitch_delay_3x+2)%3 + 1,
> +                              LP_FILTER_ORDER, L_SUBFR_16k);

i would suspect that the code can be changed so it doesnt need pitch_delay_3x
split twice in different ways, but i might be wrong.
At least % and / are slow and should be avoided, a multiply and >> could
likely replace the /3 accurately for the limited range of input and the
% can be done from - pitch_lag*3 


> +
> +
> +        memset(fixed_vector, 0, sizeof(fixed_vector));
> +
> +        ff_decode_10_pulses_35bits(params->fc_indexes[i], &f,
> +                                   ff_fc_4pulses_8bits_tracks_13, 5, 4);
> +
> +        f.pitch_lag = pitch_lag;
> +        f.pitch_fac = FFMIN(pitch_fac, 1.0);
> +
> +        ff_set_fixed_vector(fixed_vector, &f, 1.0, L_SUBFR_16k);
> +
> +        gain_corr_factor = gain_cb_16k[params->gc_index[i]];
> +        gain_code = gain_corr_factor *
> +            ff_acelp_decode_gain_codef(sqrt(L_SUBFR_16k), fixed_vector,

> +                                       19.0 - 15.0/(log2f(10.0)*0.05),

you dont need log2fm log2 should be fine for a constant in case there is
no standard #defined one


> +                                       pred_16k, ctx->energy_history,
> +                                       L_SUBFR_16k, 2);
> +
> +        ctx->energy_history[1] = ctx->energy_history[0];
> +        ctx->energy_history[0] = 20.0*log10(gain_corr_factor);

log10 not log10f, is that intended?


[...]
> @@ -533,20 +752,21 @@
>      ctx->m = modes[ctx->mode];
>      av_log(avctx, AV_LOG_DEBUG, "Mode: %s\n", ctx->m.mode_name);
>  
> +    for (i = 0; i < LP_FILTER_ORDER_16k; i++)
> +        ctx->lsp_history_16k[i] = cos((i+1) * M_PI / (LP_FILTER_ORDER_16k + 1));
> +
>      for (i = 0; i < LP_FILTER_ORDER; i++)
>          ctx->lsp_history[i] = cos((i+1) * M_PI / (LP_FILTER_ORDER + 1));

cant these use the same array?

remaining review and approval left to our celp expert team

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Good people do not need laws to tell them to act responsibly, while bad
people will find a way around the laws. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100111/e606ecdf/attachment.pgp>



More information about the ffmpeg-devel mailing list