[FFmpeg-devel] [PATCH] avcodec/dcaenc: Use ffmpeg mdct instead of own implementation

Sat Jan 13 20:41:09 EET 2018

On 13 January 2018 at 00:10, Даниил Чередник <dan.cherednik at gmail.com>
wrote:

> New path attached.
>
> Thanks.
>
> On Sat, Jan 13, 2018 at 2:37 AM, James Almer <jamrial at gmail.com> wrote:
>
> > On 1/12/2018 8:12 PM, Даниил Чередник wrote:
> > > Hysterically dcaenc uses own implementation of time->frequency
> > > transformation used by psychoacoustic. But actually function named fft
> in
> > > original dcaenc code is not fft. Power spectrum looks similar to mdct,
> > and
> > > Alexander E. Patrakov told me it is MDCT. But for me it is still a bit
> > > strange, because of output size, and absent phase shift sensitivity. I
> > was
> > > thinking about MCLT. But again, result of transformation original
> > function
> > > was different. So I decided to use ffmpeg mdct transformation here.
> > >
> > >
> > > Results:
> > >
> > > I could not hear the difference between original and modified version.
> > >
> > > I got approximately 10% performance boost.
> >
> >
> > > From 39e7f15886f1c083f3a3d37d52778882c8949a93 Mon Sep 17 00:00:00 2001
> > > From: Daniil Cherednik <dan.cherednik at gmail.com>
> > > Date: Sun, 7 Jan 2018 22:39:22 +0000
> > > Subject: [PATCH] avcodec/dcaenc: Use ffmpeg mdct instead of own
> > implementation
> > >
> > > Signed-off-by: Daniil Cherednik <dan.cherednik at gmail.com>
> > > ---
> > >  libavcodec/dcaenc.c   | 107 ++++++++++++++----------------
> > --------------------
> > >  tests/fate/acodec.mak |   4 +-
> > >  2 files changed, 32 insertions(+), 79 deletions(-)
> > >
> > > diff --git a/libavcodec/dcaenc.c b/libavcodec/dcaenc.c
> > > index dd601ffae0..b924c58185 100644
> > > --- a/libavcodec/dcaenc.c
> > > +++ b/libavcodec/dcaenc.c
> > > @@ -21,6 +21,9 @@
> > >   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > >   */
> > >
> > > +#define FFT_FLOAT 0
> > > +#define FFT_FIXED_32 1
> > > +
> > >  #include "libavutil/avassert.h"
> > >  #include "libavutil/channel_layout.h"
> > >  #include "libavutil/common.h"
> > > @@ -33,6 +36,7 @@
> > >  #include "dca_core.h"
> > >  #include "dcadata.h"
> > >  #include "dcaenc.h"
> > > +#include "fft.h"
> > >  #include "internal.h"
> > >  #include "mathops.h"
> > >  #include "put_bits.h"
> > > @@ -56,6 +60,7 @@ typedef struct DCAEncContext {
> > >      AVClass *class;
> > >      PutBitContext pb;
> > >      DCAADPCMEncContext adpcm_ctx;
> > > +    FFTContext mdct;
> > >      CompressionOptions options;
> > >      int frame_size;
> > >      int frame_bits;
> > > @@ -154,6 +159,7 @@ static int encode_init(AVCodecContext *avctx)
> > >      DCAEncContext *c = avctx->priv_data;
> > >      uint64_t layout = avctx->channel_layout;
> > >      int i, j, min_frame_bits;
> > > +    int rv;
> >
> > We normally use ret for variables meant to hold a return value.
> >
> > >
> > >      if (subband_bufer_alloc(c))
> > >          return AVERROR(ENOMEM);
> > > @@ -231,6 +237,9 @@ static int encode_init(AVCodecContext *avctx)
> > >
> > >      avctx->frame_size = 32 * SUBBAND_SAMPLES;
> > >
> > > +    if ((rv = ff_mdct_init(&c->mdct, 9, 0, 1.0)) < 0)
> > > +        return rv;
> > > +
> > >      if (!cos_table[0]) {
> > >          int j, k;
> > >
> > > @@ -297,6 +306,7 @@ static av_cold int encode_close(AVCodecContext
> > *avctx)
> > >  {
> > >      if (avctx->priv_data) {
> > >          DCAEncContext *c = avctx->priv_data;
> > > +        ff_mdct_end(&c->mdct);
> > >          subband_bufer_free(c);
> > >          ff_dcaadpcm_free(&c->adpcm_ctx);
> > >      }
> > > @@ -398,78 +408,6 @@ static void lfe_downsample(DCAEncContext *c, const
> > int32_t *input)
> > >      }
> > >  }
> > >
> > > -typedef struct {
> > > -    int32_t re;
> > > -    int32_t im;
> > > -} cplx32;
> > > -
> > > -static void fft(const int32_t in[2 * 256], cplx32 out[256])
> > > -{
> > > -    cplx32 buf[256], rin[256], rout[256];
> > > -    int i, j, k, l;
> > > -
> > > -    /* do two transforms in parallel */
> > > -    for (i = 0; i < 256; i++) {
> > > -        /* Apply the Hann window */
> > > -        rin[i].re = mul32(in[2 * i], 0x3fffffff - (cos_t(8 * i + 2) >>
> > 1));
> > > -        rin[i].im = mul32(in[2 * i + 1], 0x3fffffff - (cos_t(8 * i +
> 6)
> > >> 1));
> > > -    }
> > > -    /* pre-rotation */
> > > -    for (i = 0; i < 256; i++) {
> > > -        buf[i].re = mul32(cos_t(4 * i + 2), rin[i].re)
> > > -                  - mul32(sin_t(4 * i + 2), rin[i].im);
> > > -        buf[i].im = mul32(cos_t(4 * i + 2), rin[i].im)
> > > -                  + mul32(sin_t(4 * i + 2), rin[i].re);
> > > -    }
> > > -
> > > -    for (j = 256, l = 1; j != 1; j >>= 1, l <<= 1) {
> > > -        for (k = 0; k < 256; k += j) {
> > > -            for (i = k; i < k + j / 2; i++) {
> > > -                cplx32 sum, diff;
> > > -                int t = 8 * l * i;
> > > -
> > > -                sum.re = buf[i].re + buf[i + j / 2].re;
> > > -                sum.im = buf[i].im + buf[i + j / 2].im;
> > > -
> > > -                diff.re = buf[i].re - buf[i + j / 2].re;
> > > -                diff.im = buf[i].im - buf[i + j / 2].im;
> > > -
> > > -                buf[i].re = half32(sum.re);
> > > -                buf[i].im = half32(sum.im);
> > > -
> > > -                buf[i + j / 2].re = mul32(diff.re, cos_t(t))
> > > -                                  - mul32(diff.im, sin_t(t));
> > > -                buf[i + j / 2].im = mul32(diff.im, cos_t(t))
> > > -                                  + mul32(diff.re, sin_t(t));
> > > -            }
> > > -        }
> > > -    }
> > > -    /* post-rotation */
> > > -    for (i = 0; i < 256; i++) {
> > > -        int b = ff_reverse[i];
> > > -        rout[i].re = mul32(buf[b].re, cos_t(4 * i))
> > > -                   - mul32(buf[b].im, sin_t(4 * i));
> > > -        rout[i].im = mul32(buf[b].im, cos_t(4 * i))
> > > -                   + mul32(buf[b].re, sin_t(4 * i));
> > > -    }
> > > -    for (i = 0; i < 256; i++) {
> > > -        /* separate the results of the two transforms */
> > > -        cplx32 o1, o2;
> > > -
> > > -        o1.re =  rout[i].re - rout[255 - i].re;
> > > -        o1.im =  rout[i].im + rout[255 - i].im;
> > > -
> > > -        o2.re =  rout[i].im - rout[255 - i].im;
> > > -        o2.im = -rout[i].re - rout[255 - i].re;
> > > -
> > > -        /* combine them into one long transform */
> > > -        out[i].re = mul32( o1.re + o2.re, cos_t(2 * i + 1))
> > > -                  + mul32( o1.im - o2.im, sin_t(2 * i + 1));
> > > -        out[i].im = mul32( o1.im + o2.im, cos_t(2 * i + 1))
> > > -                  + mul32(-o1.re + o2.re, sin_t(2 * i + 1));
> > > -    }
> > > -}
> > > -
> > >  static int32_t get_cb(int32_t in)
> > >  {
> > >      int i, res;
> > > @@ -494,21 +432,36 @@ static int32_t add_cb(int32_t a, int32_t b)
> > >      return a + cb_to_add[a - b];
> > >  }
> > >
> > > -static void adjust_jnd(int samplerate_index,
> > > +static void calc_power(DCAEncContext *c,
> > > +                       const int32_t in[2 * 256], int32_t power[256])
> > > +{
> > > +    int i;
> > > +    DECLARE_ALIGNED(32, int32_t, data)[512];
> > > +    DECLARE_ALIGNED(32, int32_t, coeff)[256];
> >
> > LOCAL_ALIGNED_32(int32_t, data,  [512]);
> > LOCAL_ALIGNED_32(int32_t, coeff, [256]);
> >
> > > +    for (i = 0; i < 512; i++) {
> > > +        data[i] = norm__(mul32(in[i], 0x3fffffff - (cos_t(4 * i + 2)
> >>
> > 1)), 4);
> > > +    }
> > > +    c->mdct.mdct_calc(&c->mdct, coeff, data);
> > > +    for (i = 0; i < 256; i++) {
> > > +        const int32_t cb = get_cb(coeff[i]);
> > > +        power[i] = add_cb(cb, cb);
> > > +    }
> > > +}
> > > +
> > > +static void adjust_jnd(DCAEncContext *c,
> > >                         const int32_t in[512], int32_t out_cb[256])
> > >  {
> > >      int32_t power[256];
> > > -    cplx32 out[256];
> > >      int32_t out_cb_unnorm[256];
> > >      int32_t denom;
> > >      const int32_t ca_cb = -1114;
> > >      const int32_t cs_cb = 928;
> > > +    const int samplerate_index = c->samplerate_index;
> > >      int i, j;
> > >
> > > -    fft(in, out);
> > > +    calc_power(c, in, power);
> > >
> > >      for (j = 0; j < 256; j++) {
> > > -        power[j] = add_cb(get_cb(out[j].re), get_cb(out[j].im));
> > >          out_cb_unnorm[j] = -2047; /* and can only grow */
> > >      }
> > >
> > > @@ -586,7 +539,7 @@ static void calc_masking(DCAEncContext *c, const
> > int32_t *input)
> > >                  data[i] = c->history[ch][k];
> > >              for (k -= 512; i < 512; i++, k++)
> > >                  data[i] = input[k * c->channels + chi];
> > > -            adjust_jnd(c->samplerate_index, data,
> > c->masking_curve_cb[ssf]);
> > > +            adjust_jnd(c, data, c->masking_curve_cb[ssf]);
> > >          }
> > >      for (i = 0; i < 256; i++) {
> > >          int32_t m = 2048;
> > > diff --git a/tests/fate/acodec.mak b/tests/fate/acodec.mak
> > > index 5c3fea90c5..80d26de0f9 100644
> > > --- a/tests/fate/acodec.mak
> > > +++ b/tests/fate/acodec.mak
> > > @@ -104,14 +104,14 @@ fate-acodec-dca: tests/data/asynth-44100-2.wav
> > >  fate-acodec-dca: SRC = tests/data/asynth-44100-2.wav
> > >  fate-acodec-dca: CMD = md5 -i $(TARGET_PATH)/$(SRC) -c:a dca -strict
> -2
> > -f dts -flags +bitexact
> > >  fate-acodec-dca: CMP = oneline
> > > -fate-acodec-dca: REF = 7cd79a3717943a06b217f1130223a86f
> > > +fate-acodec-dca: REF = 2aa580ac67820fce4f581b96ebb34acc
> > >
> > >  FATE_ACODEC-$(call ENCDEC, DCA, WAV) += fate-acodec-dca2
> > >  fate-acodec-dca2: CMD = enc_dec_pcm dts wav s16le $(SRC) -c:a dca
> > -strict -2 -flags +bitexact
> > >  fate-acodec-dca2: REF = $(SRC)
> > >  fate-acodec-dca2: CMP = stddev
> > >  fate-acodec-dca2: CMP_SHIFT = -2048
> > > -fate-acodec-dca2: CMP_TARGET = 527
> > > +fate-acodec-dca2: CMP_TARGET = 535
> > >  fate-acodec-dca2: SIZE_TOLERANCE = 1632
> > >
> > >  FATE_ACODEC-$(call ENCDEC, FLAC, FLAC) += fate-acodec-flac
> > fate-acodec-flac-exact-rice
> > > --
> > > 2.13.5
> > >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel at ffmpeg.org
> > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
>
>
>
> --
> Daniil Cherednik
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>
Applied (also removed the cos_t, sin_t and half32 functions since they were
unneeded).
Also cleaned up the encoder, hopefully you didn't mind because there were
dozens of code style issues and things that shouldn't be where they were.
Thanks