[FFmpeg-devel] [PATCH] Add a G.722 encoder

Tue Sep 14 23:53:34 CEST 2010

On Sat, Sep 11, 2010 at 10:18:38PM +0300, Martin Storsj? wrote:
> On Fri, 10 Sep 2010, Martin Storsj? wrote:
> 
> > On Fri, 10 Sep 2010, Martin Storsj? wrote:
> > 
> > > On Fri, 10 Sep 2010, Michael Niedermayer wrote:
> > > 
> > > > On Fri, Sep 10, 2010 at 04:24:00PM +0300, Martin Storsj? wrote:
> > > > > Hi,
> > > > > 
> > > > > As in $subj, the first patch adds a straightforward encoder that produces 
> > > > > bitexact output matching the reference test vectors. The second patch adds 
> > > > > trellis support to the encoder, closely modelled after the code in 
> > > > > libavcodec/adpcm.c.
> > > > > 
> > > > > I'm only doing trellis on the lower sub-band, since the higher only is 
> > > > > encoded with 2 bits. And if I'd want to do trellis for both of them at the 
> > > > > same time, I'd have to run the QMF on the output from both subbands in 
> > > > > order to get one decoded sample values, in order to have one difference 
> > > > > instead of two to compare.
> > > > 
> > > > why not comapare 2 ?
> > > > 
> > > > diff= (a0-a1)^2 + C*(b0-b1)^2
> > > 
> > > I guess that's doable, too, I'll try that.
> > 
> > This actually turned out to work quite well, thanks! New version attached 
> > that does trellis for both of them at the same time.
> 
> Updated patches attached - I tuned the testing range for the lower subband 
> a bit to achieve even better results.
> 
> // Martin
>  Changelog              |    2 -
>  doc/general.texi       |    2 -
>  libavcodec/Makefile    |    1 
>  libavcodec/allcodecs.c |    2 -
>  libavcodec/g722.c      |   82 +++++++++++++++++++++++++++++++++++++++++++++++--
>  5 files changed, 84 insertions(+), 5 deletions(-)
> bec600bc0d23482505cc14b4147d6408dc0b1829  0001-Add-a-G.722-encoder.patch
> From 2ba47a682860c43d342ee2053a3daae3498cdd20 Mon Sep 17 00:00:00 2001
> From: Martin Storsjo <martin at martin.st>
> Date: Fri, 6 Aug 2010 23:06:37 +0300
> Subject: [PATCH 1/2] Add a G.722 encoder
> 
> ---
>  Changelog              |    2 +-
>  doc/general.texi       |    2 +-
>  libavcodec/Makefile    |    1 +
>  libavcodec/allcodecs.c |    2 +-
>  libavcodec/g722.c      |   82 ++++++++++++++++++++++++++++++++++++++++++++++-
>  5 files changed, 84 insertions(+), 5 deletions(-)
> 
> diff --git a/Changelog b/Changelog
> index 70849ea..59dabbf 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -33,7 +33,7 @@ version <next>:
>  - Apple HTTP Live Streaming demuxer
>  - a64 codec
>  - MMS-HTTP support
> -- G.722 ADPCM audio decoder
> +- G.722 ADPCM audio encoder/decoder
>  
>  
>  version 0.6:
> diff --git a/doc/general.texi b/doc/general.texi
> index a692e04..42b9e92 100644
> --- a/doc/general.texi
> +++ b/doc/general.texi
> @@ -535,7 +535,7 @@ following image formats are supported:
>  @item ADPCM Electronic Arts R2  @tab     @tab  X
>  @item ADPCM Electronic Arts R3  @tab     @tab  X
>  @item ADPCM Electronic Arts XAS @tab     @tab  X
> - at item ADPCM G.722            @tab     @tab  X
> + at item ADPCM G.722            @tab  X  @tab  X
>  @item ADPCM G.726            @tab  X  @tab  X
>  @item ADPCM IMA AMV          @tab     @tab  X
>      @tab Used in AMV files
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index e0d7028..a4ee523 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -476,6 +476,7 @@ OBJS-$(CONFIG_ADPCM_EA_R2_DECODER)        += adpcm.o
>  OBJS-$(CONFIG_ADPCM_EA_R3_DECODER)        += adpcm.o
>  OBJS-$(CONFIG_ADPCM_EA_XAS_DECODER)       += adpcm.o
>  OBJS-$(CONFIG_ADPCM_G722_DECODER)         += g722.o
> +OBJS-$(CONFIG_ADPCM_G722_ENCODER)         += g722.o
>  OBJS-$(CONFIG_ADPCM_G726_DECODER)         += g726.o
>  OBJS-$(CONFIG_ADPCM_G726_ENCODER)         += g726.o
>  OBJS-$(CONFIG_ADPCM_IMA_AMV_DECODER)      += adpcm.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 31cfadd..ab2dcd3 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -317,7 +317,7 @@ void avcodec_register_all(void)
>      REGISTER_DECODER (ADPCM_EA_R2, adpcm_ea_r2);
>      REGISTER_DECODER (ADPCM_EA_R3, adpcm_ea_r3);
>      REGISTER_DECODER (ADPCM_EA_XAS, adpcm_ea_xas);
> -    REGISTER_DECODER (ADPCM_G722, adpcm_g722);
> +    REGISTER_ENCDEC  (ADPCM_G722, adpcm_g722);
>      REGISTER_ENCDEC  (ADPCM_G726, adpcm_g726);
>      REGISTER_DECODER (ADPCM_IMA_AMV, adpcm_ima_amv);
>      REGISTER_DECODER (ADPCM_IMA_DK3, adpcm_ima_dk3);
> diff --git a/libavcodec/g722.c b/libavcodec/g722.c
> index 8707d16..b893d22 100644
> --- a/libavcodec/g722.c
> +++ b/libavcodec/g722.c
> @@ -1,5 +1,5 @@
>  /*
> - * G.722 ADPCM audio decoder
> + * G.722 ADPCM audio encoder/decoder
>   *
>   * Copyright (c) CMU 1993 Computer Science, Speech Group
>   *                        Chengxiang Lu and Alex Hauptmann
> @@ -213,12 +213,13 @@ static av_cold int g722_init(AVCodecContext * avctx)
>      c->band[1].scale_factor = 2;
>      c->prev_samples_pos = 22;
>  
> -    if (avctx->lowres)
> +    if (avctx->lowres && avctx->codec->decode)
>          avctx->sample_rate /= 2;
>  
>      return 0;
>  }
>  
> +#if CONFIG_ADPCM_G722_DECODER
>  static const int16_t low_inv_quant5[32] = {
>       -35,   -35, -2919, -2195, -1765, -1458, -1219, -1023,
>      -858,  -714,  -587,  -473,  -370,  -276,  -190,  -110,
> @@ -301,4 +302,81 @@ AVCodec adpcm_g722_decoder = {
>      .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
>      .max_lowres     = 1,
>  };
> +#endif
> +
> +#if CONFIG_ADPCM_G722_ENCODER
> +static const int16_t low_quant[29] = {
> +      35,   72,  110,  150,  190,  233,  276,  323,
> +     370,  422,  473,  530,  587,  650,  714,  786,
> +     858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
> +    1765, 1980, 2195, 2557, 2919
> +};
> +
> +static inline void filter_samples(G722Context *c, const int16_t *samples,
> +                                  int *xlow, int *xhigh)
> +{
> +    int xout1, xout2;
> +    c->prev_samples[c->prev_samples_pos++] = samples[0];
> +    c->prev_samples[c->prev_samples_pos++] = samples[1];
> +    apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
> +    *xlow  = xout1 + xout2 >> 13;
> +    *xhigh = xout1 - xout2 >> 13;
> +    if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
> +        memmove(c->prev_samples,
> +                c->prev_samples + c->prev_samples_pos - 22,
> +                22 * sizeof(c->prev_samples[0]));
> +        c->prev_samples_pos = 22;
> +    }
> +}
> +
> +static inline int encode_high(G722Context *c, int xhigh)
> +{
> +    int diff = av_clip_int16(xhigh - c->band[1].s_predictor);
> +    int pred = 564 * c->band[1].scale_factor >> 10;

*141 >> 8


> +    int index = diff >= 0 ? (diff < pred) + 2 : diff >= -pred;
> +
> +    update_high_predictor(&c->band[1], c->band[1].scale_factor *
> +                          high_inv_quant[index] >> 10, index);
> +    return index;
> +}
> +
> +static inline int encode_low(const struct G722Band* state, int xlow)
> +{
> +    int diff  = av_clip_int16(xlow - state->s_predictor);
> +    int limit = diff >= 0 ? diff : -(diff + 1);

> +    int i = 0;
> +    while (i < 29 && limit >= (low_quant[i] * state->scale_factor) >> 10)
> +        i++;

that doesnt look efficient
limit >= (low_quant[i] * state->scale_factor) >> 10)
can be changed to
C > low_quant[i]

also a LUT could be tried if this matters speed wise

trellis patch not reviewed yet

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Complexity theory is the science of finding the exact solution to an
approximation. Benchmarking OTOH is finding an approximation of the exact
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100914/917e56dc/attachment.pgp>