[FFmpeg-devel] [PATCH] Chinese AVS encoder

Sun Jan 23 16:24:06 CET 2011

On 07/25/2007 07:50 AM, Stefan Gehrer wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Hi,
>
> Michael Niedermayer wrote:
>>> Michael, regarding your question on cvslog (sorry for crossposting,
>>> I wasn't subscribed to cvslog at the time):
>>>
>>>>> Author: stefang
>>>>> Date: Sat Jul  7 09:14:58 2007
>>>>> New Revision: 9518
>>>>>
>>>>> Log:
>>>>> move dequantization into it's own inline function
>>>> why inline? same question for the other inline functions
>>>> is there some speed gain from duplicating them in the object files
>>>> between encoder and decoder?
>>> To be honest I did not think much about object size, it just seemed
>>> to be a convenient way to share functions this way instead of having
>>> to create ff_cavs_foobar names and without creating function call
>>> overhead that was not there before.  If you think object size would be a
>>> problem I can of course move some functions back from cavs.h to cavs.c.
>>
>> yes please do unless theres a real speed loss from doing so
>>
>
> done
>
>>
>> [...]
>>> +/**
>>> + * eliminate residual blocks that only have insignificant coefficients,
>>> + * inspired from x264 and JVT-B118
>>> + */
>>> +static inline int decimate_block(uint8_t *run, DCTELEM *level, int count) {
>>> +    static const uint8_t run_score[30] = {
>>> +        0,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 };
>>> +    int i;
>>> +    int score = 0;
>>> +
>>> +    if(count>4)
>>> +        return 9;
>>> +    for(i=0;i<count;i++) {
>>> +        int abslevel = FFABS(level[i]);
>>> +        if(abslevel>  1)
>>> +            return 9;
>>> +        score += run_score[FFMIN(run[i],29)];
>>> +    }
>>> +    return score;
>>> +}
>>
>> how much psnr/bitrate is gained by this? if none then please drop it
>
> I tested on the popular foreman (300 CIF frames). When encoding at
> around 800kbps, roughly 0.1dB are gained by this. When encoding at
> around 200kbps, this increases to a gain of around 0.15dB.
> So I would like to keep it.
>
>>
>> [...]
>>
>>> +    av_reduce(&frame_rate.den,&frame_rate.num,
>>> +              s->avctx->time_base.num, s->avctx->time_base.den, 60000);
>>
>> if the exact one isnt support then the code should failm its the user
>> apps job to choose a supported one
>>
>
> removed
>
>
>>
>>> +    for(i=0;i<15;i++)
>>> +        if((ff_frame_rate_tab[i].den == frame_rate.den)&&
>>> +           (ff_frame_rate_tab[i].num == frame_rate.num))
>>> +            frame_rate_code = i;
>>> +    if(frame_rate_code<  0) {
>>> +        av_log(h->s.avctx, AV_LOG_ERROR, "unsupported framerate %d/%d\n",
>>> +               frame_rate.num, frame_rate.den);
>>> +        return -1;
>>> +    }
>>
>>
>> [...]
>>> +        put_bits(&s->pb,16,0);
>>> +        put_bits(&s->pb,16,CAVS_START_CODE);
>>
>> add a put_bits_long() to bitstrea.c, document the issue with 32bits and
>> fix vorbis_enc.c :)
>
> I looked a bit into this and I think a better way would be to fix
> put_bits() to support up to 32 bits instead of up to 31 bits.
> I am not so sure, but after a bit of checking in vorbis_enc.c it
> seems there is never any writing with more than 32 bits even
> though the local put_bits() function takes a 64bit argument.
>
> Attached a new encoder patch and a proposed fix for put_bits().


Git-friendly patches attached so patchwork will catch it up.

-Vitor

>From stefan.gehreratgmx.de  Sun Jan 23 16:19:30 2011
From: stefan.gehreratgmx.de (Stefan Gehrer)
Date: Sun, 23 Jan 2011 16:19:30 +0100
Subject: [PATCH 1/2] Make put_bits() support 32 bits instead of 31
Message-ID: <mailman.461.1295796257.1307.ffmpeg-devel at mplayerhq.hu>

---
 libavcodec/bitstream.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h
index fd1b4a9..04942a0 100644
--- a/libavcodec/bitstream.h
+++ b/libavcodec/bitstream.h
@@ -226,7 +226,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
         bit_buf = (bit_buf<<n) | value;
         bit_left-=n;
     } else {
-        bit_buf<<=bit_left;
+        bit_buf = (uint64_t)bit_buf << bit_left;
         bit_buf |= value >> (n - bit_left);
 #ifdef UNALIGNED_STORES_ARE_BAD
         if (3 & (intptr_t) s->buf_ptr) {
-- 
1.7.1


--------------010605070306040906030502
Content-Type: text/x-patch;
 name="0002-Chinese-AVS-encoder.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="0002-Chinese-AVS-encoder.patch"


>From stefan.gehreratgmx.de  Sun Jan 23 16:21:20 2011
From: stefan.gehreratgmx.de (Stefan Gehrer)
Date: Sun, 23 Jan 2011 16:21:20 +0100
Subject: [PATCH 2/2] Chinese AVS encoder
Message-ID: <mailman.462.1295796257.1307.ffmpeg-devel at mplayerhq.hu>

---
 Changelog              |    1 +
 doc/ffmpeg-doc.texi    |    1 +
 libavcodec/Makefile    |    1 +
 libavcodec/allcodecs.c |    2 +-
 libavcodec/allcodecs.h |    1 +
 libavcodec/cavs.c      |    4 +-
 libavcodec/cavs.h      |   27 ++-
 libavcodec/cavsdsp.c   |   99 +++++++-
 libavcodec/cavsenc.c   |  687 ++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/dsputil.h   |    3 +
 10 files changed, 821 insertions(+), 5 deletions(-)
 create mode 100644 libavcodec/cavsenc.c

diff --git a/Changelog b/Changelog
index 6ecbdb7..d8d177b 100644
--- a/Changelog
+++ b/Changelog
@@ -90,6 +90,7 @@ version <next>
 - RoQ video encoder
 - QTRLE encoder
 - OS/2 support removed
+- Chinese AVS encoder
 
 version 0.4.9-pre1:
 
diff --git a/doc/ffmpeg-doc.texi b/doc/ffmpeg-doc.texi
index fca90cc..0e99c29 100644
--- a/doc/ffmpeg-doc.texi
+++ b/doc/ffmpeg-doc.texi
@@ -1072,6 +1072,7 @@ following image formats are supported:
 @item THP                    @tab     @tab  X @tab Used on the Nintendo GameCube.
 @item Bethsoft VID           @tab     @tab  X @tab Used in some games from Bethesda Softworks.
 @item Renderware TXD         @tab     @tab  X @tab Texture dictionaries used by the Renderware Engine.
+ at item Chinese AVS            @tab  X  @tab  X @tab JiZhun Profile
 @end multitable
 
 @code{X} means that encoding (resp. decoding) is supported.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index e1685fe..47ffdae 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -45,6 +45,7 @@ OBJS-$(CONFIG_BMP_DECODER)             += bmp.o
 OBJS-$(CONFIG_BMP_ENCODER)             += bmpenc.o
 OBJS-$(CONFIG_C93_DECODER)             += c93.o
 OBJS-$(CONFIG_CAVS_DECODER)            += cavs.o cavsdec.o cavsdsp.o golomb.o
+OBJS-$(CONFIG_CAVS_ENCODER)            += cavs.o cavsenc.o cavsdsp.o golomb.o
 OBJS-$(CONFIG_CINEPAK_DECODER)         += cinepak.o
 OBJS-$(CONFIG_CLJR_DECODER)            += cljr.o
 OBJS-$(CONFIG_CLJR_ENCODER)            += cljr.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 8153e41..5a8f69b 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -63,7 +63,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(BETHSOFTVID, bethsoftvid);
     REGISTER_ENCDEC (BMP, bmp);
     REGISTER_DECODER(C93, c93);
-    REGISTER_DECODER(CAVS, cavs);
+    REGISTER_ENCDEC (CAVS, cavs);
     REGISTER_DECODER(CINEPAK, cinepak);
     REGISTER_DECODER(CLJR, cljr);
     REGISTER_DECODER(CSCD, cscd);
diff --git a/libavcodec/allcodecs.h b/libavcodec/allcodecs.h
index 3e18ca0..3d32945 100644
--- a/libavcodec/allcodecs.h
+++ b/libavcodec/allcodecs.h
@@ -27,6 +27,7 @@ extern AVCodec ac3_encoder;
 extern AVCodec asv1_encoder;
 extern AVCodec asv2_encoder;
 extern AVCodec bmp_encoder;
+extern AVCodec cavs_encoder;
 extern AVCodec dvvideo_encoder;
 extern AVCodec ffv1_encoder;
 extern AVCodec ffvhuff_encoder;
diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c
index 31b7e58..323989b 100644
--- a/libavcodec/cavs.c
+++ b/libavcodec/cavs.c
@@ -1,5 +1,5 @@
 /*
- * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
+ * Chinese AVS video (AVS1-P2, JiZhun profile) codec
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer at gmx.de>
  *
  * This file is part of FFmpeg.
@@ -21,7 +21,7 @@
 
 /**
  * @file cavs.c
- * Chinese AVS video (AVS1-P2, JiZhun profile) decoder
+ * Chinese AVS video (AVS1-P2, JiZhun profile) codec
  * @author Stefan Gehrer <stefan.gehrer at gmx.de>
  */
 
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
index adc9c24..fad084a 100644
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -55,6 +55,13 @@
 #define MV_BWD_OFFS                     12
 #define MV_STRIDE                        4
 
+#define CAVS_MAX_RUN                    25
+#define CAVS_MAX_LEVEL                  26
+#define INTRA_BIAS                   38<<8
+#define INTER_BIAS                   28<<8
+#define ME_THRES                     77000
+#define ME_ITER                         20
+
 enum mb_t {
   I_8X8 = 0,
   P_SKIP,
@@ -104,7 +111,8 @@ enum mv_pred_t {
   MV_PRED_TOP,
   MV_PRED_TOPRIGHT,
   MV_PRED_PSKIP,
-  MV_PRED_BSKIP
+  MV_PRED_BSKIP,
+  MV_PRED_ENC
 };
 
 enum block_t {
@@ -153,9 +161,16 @@ typedef struct dec_2dvlc_t {
 } dec_2dvlc_t;
 
 typedef struct {
+    int8_t rlcode[CAVS_MAX_RUN+1][CAVS_MAX_LEVEL+1];
+    int end_code;
+    const dec_2dvlc_t *dec;
+} enc_2dvlc_t;
+
+typedef struct {
     MpegEncContext s;
     Picture picture; ///< currently decoded frame
     Picture DPB[2];  ///< reference frames
+    Picture input_picture; ///< encoder input
     int dist[2];     ///< temporal distances from current frame to ref frames
     int profile, level;
     int aspect_ratio;
@@ -171,6 +186,7 @@ typedef struct {
     int flags;         ///< availability flags of neighbouring macroblocks
     int stc;           ///< last start code
     uint8_t *cy, *cu, *cv; ///< current MB sample pointers
+    uint8_t *ey, *eu, *ev; ///< encoded MB sample pointers
     int left_qp;
     uint8_t *top_qp;
 
@@ -222,6 +238,15 @@ typedef struct {
 
     int got_keyframe;
     DCTELEM *block;
+
+    /* encoder only */
+    DCTELEM *levels[6];
+    uint8_t *runs[6];
+    int total_coeff[6];
+    int lambda;
+    int mrefs;         ///< flag: one reference frame (0) or two (1)
+    int skip_count;
+    int poc;           ///< picture order count
 } AVSContext;
 
 extern const uint8_t     ff_cavs_dequant_shift[64];
diff --git a/libavcodec/cavsdsp.c b/libavcodec/cavsdsp.c
index fd744cc..5db9abd 100644
--- a/libavcodec/cavsdsp.c
+++ b/libavcodec/cavsdsp.c
@@ -1,5 +1,5 @@
 /*
- * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
+ * Chinese AVS video (AVS1-P2, JiZhun profile) codec
  *
  * DSP functions
  *
@@ -177,6 +177,101 @@ static void cavs_filter_ch_c(uint8_t *d, int stride, int alpha, int beta, int tc
 
 /*****************************************************************************
  *
+ * quantization
+ *
+ ****************************************************************************/
+
+static void cavs_quant_c(DCTELEM *block, const uint16_t *norm, int mul,
+                         int bias) {
+    int i;
+
+    for(i=0;i<64;i++) {
+        if(block[i] > 0)
+            block[i] =   ((((1<<18) + block[i]*norm[i])>>19)*mul+bias)>>15;
+        else
+            block[i] = -(((((1<<18) - block[i]*norm[i])>>19)*mul+bias)>>15);
+    }
+}
+
+/*****************************************************************************
+ *
+ * forward transform
+ *
+ ****************************************************************************/
+
+static void cavs_sub_dct8_c(uint8_t *src1, uint8_t *src2, DCTELEM *block, int stride1, int stride2) {
+    int i,j;
+    DCTELEM (*dst)[8] = (DCTELEM(*)[8])block;
+
+    for(j=0;j<8;j++) {
+        for(i=0;i<8;i++)
+            dst[j][i] = src1[i] - src2[i];
+        src1 += stride1;
+        src2 += stride2;
+    }
+
+    for(i = 0; i < 8; i++) {
+        const int a0 =  dst[i][0] + dst[i][7];
+        const int a1 =  dst[i][1] + dst[i][6];
+        const int a2 =  dst[i][2] + dst[i][5];
+        const int a3 =  dst[i][3] + dst[i][4];
+        const int a4 =  dst[i][0] - dst[i][7];
+        const int a5 =  dst[i][1] - dst[i][6];
+        const int a6 =  dst[i][2] - dst[i][5];
+        const int a7 =  dst[i][3] - dst[i][4];
+
+        const int b0 = a0 + a3;
+        const int b1 = a1 + a2;
+        const int b2 = a0 - a3;
+        const int b3 = a1 - a2;
+
+        const int b4 = ((a4 - a7)<<1) + a4;
+        const int b5 = ((a6 + a5)<<1) + a5;
+        const int b6 = ((a6 - a5)<<1) + a6;
+        const int b7 = ((a4 + a7)<<1) + a7;
+
+        dst[i][0] = (b0 + b1)<<3;
+        dst[i][1] = ((b4 + b5 + b7)<<1) + b5;
+        dst[i][2] = b2*10 + b3*4;
+        dst[i][3] = ((b4 - b5 - b6)<<1) + b4;
+        dst[i][4] = (b0 - b1)<<3;
+        dst[i][5] = ((b7 - b5 + b6)<<1) + b7;
+        dst[i][6] = b2*4 - b3*10;
+        dst[i][7] = ((b4 + b6 - b7)<<1) + b6;
+    }
+    for(i = 0; i < 8; i++) {
+        const int a0 =  dst[0][i] + dst[7][i];
+        const int a1 =  dst[1][i] + dst[6][i];
+        const int a2 =  dst[2][i] + dst[5][i];
+        const int a3 =  dst[3][i] + dst[4][i];
+        const int a4 =  dst[0][i] - dst[7][i];
+        const int a5 =  dst[1][i] - dst[6][i];
+        const int a6 =  dst[2][i] - dst[5][i];
+        const int a7 =  dst[3][i] - dst[4][i];
+
+        const int b0 = a0 + a3 + 2;
+        const int b1 = a1 + a2;
+        const int b2 = a0 - a3;
+        const int b3 = a1 - a2;
+
+        const int b4 = ((a4 - a7)<<1) + a4;
+        const int b5 = ((a6 + a5)<<1) + a5;
+        const int b6 = ((a6 - a5)<<1) + a6;
+        const int b7 = ((a4 + a7)<<1) + a7;
+
+        dst[0][i] = (((b0 + b1)<<3)            + 0)>>5;
+        dst[1][i] = ((((b4 + b5 + b7)<<1) + b5)+16)>>5;
+        dst[2][i] = ((b2*10 + b3*4)            +16)>>5;
+        dst[3][i] = ((((b4 - b5 - b6)<<1) + b4)+16)>>5;
+        dst[4][i] = (((b0 - b1)<<3)            + 0)>>5;
+        dst[5][i] = ((((b7 - b5 + b6)<<1) + b7)+16)>>5;
+        dst[6][i] = ((b2*4 - b3*10)            +16)>>5;
+        dst[7][i] = ((((b4 + b6 - b7)<<1) + b6)+16)>>5;
+    }
+}
+
+/*****************************************************************************
+ *
  * inverse transform
  *
  ****************************************************************************/
@@ -543,4 +638,6 @@ void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx) {
     c->cavs_filter_cv = cavs_filter_cv_c;
     c->cavs_filter_ch = cavs_filter_ch_c;
     c->cavs_idct8_add = cavs_idct8_add_c;
+    c->cavs_sub_dct8  = cavs_sub_dct8_c;
+    c->cavs_quant = cavs_quant_c;
 }
diff --git a/libavcodec/cavsenc.c b/libavcodec/cavsenc.c
new file mode 100644
index 0000000..d2d2743
--- /dev/null
+++ b/libavcodec/cavsenc.c
@@ -0,0 +1,687 @@
+/*
+ * Chinese AVS video (AVS1-P2, JiZhun profile) encoder
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer at gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file cavsenc.c
+ * Chinese AVS video (AVS1-P2, JiZhun profile) encoder
+ * @author Stefan Gehrer <stefan.gehrer at gmx.de>
+ */
+
+#include "avcodec.h"
+#include "golomb.h"
+#include "cavs.h"
+
+static const uint8_t cbp_enc_tab[64][2] = {
+    { 4, 0},{16,19},{17,16},{19,15},{14,18},{ 9,11},{22,31},{ 8,13},
+    {11,17},{21,30},{10,12},{ 7, 9},{12,10},{ 6, 7},{ 5, 8},{ 1, 1},
+    {35, 4},{47,42},{48,38},{38,27},{46,39},{36,33},{50,59},{26,26},
+    {45,40},{52,58},{41,35},{28,25},{37,29},{23,24},{31,28},{ 2, 3},
+    {43, 5},{51,51},{56,52},{39,37},{55,50},{33,43},{62,63},{27,44},
+    {54,53},{60,62},{40,48},{32,47},{42,34},{24,45},{29,49},{ 3, 6},
+    {49,14},{53,55},{57,56},{25,36},{58,54},{30,41},{59,60},{15,21},
+    {61,57},{63,61},{44,46},{18,22},{34,32},{13,20},{20,23},{ 0, 2}
+};
+
+/*
+ * taken from AVS paper by Lu Yu/Feng Yi/Jie Dong/Cixun Zhang
+ * this is used to post-normalise the rounding errors of the
+ * forward transform in the encoder as well as pre-normalise the
+ * rounding errors of the inverse transform in the decoder,
+ * both due to using non-orthogonal integer matrices
+ */
+static const uint16_t quant_norm[64] = {
+    32768, 37958, 36158, 37958, 32768, 37958, 36158, 37958,
+    37958, 43969, 41884, 43969, 37958, 43969, 41884, 43969,
+    36158, 41884, 39898, 41884, 36158, 41884, 39898, 41884,
+    37958, 43969, 41884, 43969, 37958, 43969, 41884, 43969,
+    32768, 37958, 36158, 37958, 32768, 37958, 36158, 37958,
+    37958, 43969, 41884, 43969, 37958, 43969, 41884, 43969,
+    36158, 41884, 39898, 41884, 36158, 41884, 39898, 41884,
+    37958, 43969, 41884, 43969, 37958, 43969, 41884, 43969,
+};
+
+static const uint16_t quant_mul[64] = {
+    32768,29775,27554,25268,23170,21247,19369,17770,
+    16302,15024,13777,12634,11626,10624, 9742, 8958,
+     8192, 7512, 6889, 6305, 5793, 5303, 4878, 4467,
+     4091, 3756, 3444, 3161, 2894, 2654, 2435, 2235,
+     2048, 1878, 1722, 1579, 1449, 1329, 1218, 1117,
+     1024,  939,  861,  790,  724,  664,  609,  558,
+      512,  470,  430,  395,  362,  332,  304,  279,
+      256,  235,  215,  197,  181,  166,  152,  140
+};
+
+static enc_2dvlc_t intra_enc[7];
+static enc_2dvlc_t inter_enc[7];
+static enc_2dvlc_t chroma_enc[5];
+
+/*****************************************************************************
+ *
+ * residual data encoding
+ *
+ ****************************************************************************/
+
+/** kth-order exponential golomb code */
+static inline void put_ue_code(PutBitContext *pb, int order, int value) {
+    set_ue_golomb(pb, value>>order);
+    put_bits(pb, order, value & ((1<<order)-1));
+}
+
+/**
+ * entropy coding of one residual 8x8 block
+ * @param enc pointer to 2D VLC encoding table
+ * @param esc_golomb_order escape codes are k-golomb with this order k
+ * @param qp quantizer
+ * @param block block number [0..5]
+ */
+static void encode_residual_block(AVSContext *h, PutBitContext *pb,
+                                  const enc_2dvlc_t *enc, int esc_golomb_order,
+                                  int qp, int block) {
+    int run = 0;
+    int coeff_num, level_code;
+    DCTELEM *level_buf = h->levels[block];
+    uint8_t *run_buf = h->runs[block];
+    const dec_2dvlc_t *dec = enc->dec;
+
+    for(coeff_num=0;coeff_num<h->total_coeff[block];coeff_num++) {
+        int level = abs(level_buf[coeff_num])-1;
+        int sign = (level_buf[coeff_num]>>31)&1;
+        run = run_buf[coeff_num] - 1;
+        if((level > CAVS_MAX_LEVEL)||(run > CAVS_MAX_RUN)
+           ||((level_code = enc->rlcode[run][level])<0)) {
+            put_ue_code(pb,dec->golomb_order,run*2+ESCAPE_CODE+1-sign);
+            if(run >= dec->max_run)
+                put_ue_code(pb,esc_golomb_order, level);
+            else
+                put_ue_code(pb,esc_golomb_order, level + 1 -
+                            dec->level_add[run+1]);
+            while(level >= dec->inc_limit) {
+                dec++;
+                enc++;
+            }
+        } else {
+            put_ue_code(pb,dec->golomb_order,level_code+sign);
+            enc += dec->rltab[level_code][2];
+            dec += dec->rltab[level_code][2];
+        }
+    }
+    put_ue_code(pb,dec->golomb_order,enc->end_code);
+}
+
+/**
+ * entropy coding of one macroblock
+ * @param intra flag inter=0 intra=1
+ */
+static void encode_residual(AVSContext *h, int intra) {
+    PutBitContext *pb = &h->s.pb;
+    int block;
+
+    for(block=0;block<4;block++)
+        if(h->cbp & (1<<block)) {
+            if(intra)
+                encode_residual_block(h,pb,intra_enc,1,h->qp,block);
+            else
+                encode_residual_block(h,pb,inter_enc,0,h->qp,block);
+        }
+    if(h->cbp & (1<<4))
+        encode_residual_block(h,pb,chroma_enc,0,ff_cavs_chroma_qp[h->qp], 4);
+    if(h->cbp & (1<<5))
+        encode_residual_block(h,pb,chroma_enc,0,ff_cavs_chroma_qp[h->qp], 5);
+}
+
+/**
+ * eliminate residual blocks that only have insignificant coefficients,
+ * inspired from x264 and JVT-B118
+ */
+static inline int decimate_block(uint8_t *run, DCTELEM *level, int count) {
+    static const uint8_t run_score[30] = {
+        0,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 };
+    int i;
+    int score = 0;
+
+    if(count>4)
+        return 9;
+    for(i=0;i<count;i++) {
+        int abslevel = FFABS(level[i]);
+        if(abslevel > 1)
+            return 9;
+        score += run_score[FFMIN(run[i],29)];
+    }
+    return score;
+}
+
+/*****************************************************************************
+ *
+ * reconstruction as the decoder sees it
+ *
+ ****************************************************************************/
+
+static int recon_block(AVSContext *h, uint8_t *src1, uint8_t *src2, int stride,
+                       int block, int qp, int intra) {
+    int i,pos;
+    int score = 9;
+    int run = 0;
+    const uint8_t *scantab = ff_zigzag_direct;
+
+    h->s.dsp.cavs_sub_dct8(src1, src2, h->block, stride, stride);
+    h->s.dsp.cavs_quant(h->block, quant_norm, quant_mul[qp],
+                        intra ? INTRA_BIAS : INTER_BIAS);
+    /* forward scan */
+    for(pos=63,i=0;pos>=0;pos--) {
+        if(h->block[scantab[pos]]) {
+            if(i)
+                h->runs[block][i-1] = run+1;
+            h->levels[block][i++] = h->block[scantab[pos]];
+            run = 0;
+        } else
+            run++;
+    }
+    /* eliminate empty or insignificant blocks */
+    if(!i)
+        return 0;
+    h->runs[block][i-1] = run+1;
+    if(!intra) {
+        score = decimate_block(h->runs[block],h->levels[block],i);
+        h->total_coeff[block] = 0;
+        if(score < 4)
+            return score;
+    }
+    h->cbp |= (1 << block);
+    h->total_coeff[block] = i;
+    /* dequantise and restore blocks */
+    dequant(h , h->levels[block], h->runs[block], h->block,
+            ff_cavs_dequant_mul[qp], ff_cavs_dequant_shift[qp], i);
+    h->s.dsp.cavs_idct8_add(src2, h->block, stride);
+    return score;
+}
+
+static inline void recon_mb(AVSContext *h, int *lscore, int *cscore) {
+    int block;
+
+    for(block=0;block<4;block++)
+        *lscore += recon_block(h, h->ey + h->luma_scan[block],
+                               h->cy + h->luma_scan[block],
+                               h->l_stride, block, h->qp, 0);
+    *cscore += recon_block(h, h->eu, h->cu, h->c_stride, 4,
+                           ff_cavs_chroma_qp[h->qp], 0);
+    *cscore += recon_block(h, h->ev, h->cv, h->c_stride, 5,
+                           ff_cavs_chroma_qp[h->qp], 0);
+}
+
+/*****************************************************************************
+ *
+ * intra encoding
+ *
+ ****************************************************************************/
+
+static inline int modify(int flags, int block, int mode) {
+    if(!(flags & A_AVAIL) && !(block & 1))
+        mode = ff_left_modifier_l[mode];
+    if(!(flags & B_AVAIL) && !(block & 2) && (mode >= 0))
+        mode = ff_top_modifier_l[mode];
+    return mode;
+}
+
+static void encode_mb_i(AVSContext *h) {
+    PutBitContext *pb = &h->s.pb;
+    int block, i, pred_mode_uv, min_cost, mode, best_mode, diff;
+    int coded_mode[5];
+    uint8_t top[18];
+    uint8_t *left = NULL;
+    uint8_t *d;
+    uint8_t *s;
+
+    assert(h->mbx || (!(h->flags & A_AVAIL)));
+    h->lambda = h->qp/3;
+    h->cbp = pred_mode_uv = best_mode = 0;
+    for(block=0;block<4;block++) {
+        int pos = ff_cavs_scan3x3[block];
+        int predpred = FFMIN(h->pred_mode_Y[pos-1], h->pred_mode_Y[pos-3]);
+
+        min_cost = INT_MAX;
+        s = h->ey + h->luma_scan[block];
+        d = h->cy + h->luma_scan[block];
+        ff_cavs_load_intra_pred_luma(h, top, &left, block);
+        if(predpred == NOT_AVAIL) // if either is not available
+            predpred = INTRA_L_LP;
+
+        /* try all luma intra prediction modes */
+        for(i=0;i<5;i++) {
+            const int bitcost[5] = {3,3,3,3,1};
+
+            mode = (i == 4) ? predpred : i + (i >= predpred);
+            mode = modify(h->flags, block, mode);
+            if(mode >= 0) {
+                h->intra_pred_l[mode](d, top, left, h->l_stride);
+                diff = h->s.dsp.sse[1](NULL, s, d, h->l_stride, 8);
+                if((bitcost[i]*h->lambda + diff) < min_cost) {
+                    min_cost = bitcost[i]*h->lambda + diff;
+                    coded_mode[block] = i;
+                    best_mode = mode;
+                    h->pred_mode_Y[pos] = (i == 4) ? predpred : i + (i >= predpred);
+                }
+            }
+        }
+        /* reconstruct block for next one */
+        h->intra_pred_l[best_mode](d, top, left, h->l_stride);
+        recon_block(h, s, d, h->l_stride, block, h->qp, 1);
+    }
+
+    /* try all chroma intra prediction modes */
+    ff_cavs_load_intra_pred_chroma(h);
+    min_cost = INT_MAX;
+    for(i=0;i<4;i++) {
+        const int bitcost[4] = {1,3,3,5};
+
+        mode = i;
+        if(!(h->flags & A_AVAIL))
+            mode = ff_left_modifier_c[mode];
+        if(!(h->flags & B_AVAIL) && (mode >=0))
+            mode = ff_top_modifier_c[mode];
+        if(mode >= 0) {
+            h->intra_pred_c[mode](h->cu, &h->top_border_u[h->mbx*10],
+                                  h->left_border_u, h->c_stride);
+            diff = h->s.dsp.sse[1](NULL, h->eu, h->cu, h->c_stride, 8);
+            h->intra_pred_c[mode](h->cv, &h->top_border_v[h->mbx*10],
+                                  h->left_border_v, h->c_stride);
+            diff += h->s.dsp.sse[1](NULL, h->ev, h->cv, h->c_stride, 8);
+            if((bitcost[i]*h->lambda + diff) < min_cost) {
+                min_cost = bitcost[i]*h->lambda + diff;
+                pred_mode_uv = mode;
+                coded_mode[4] = i;
+            }
+        }
+    }
+
+    /* reconstruct blocks */
+    h->intra_pred_c[pred_mode_uv](h->cu, &h->top_border_u[h->mbx*10],
+                                  h->left_border_u, h->c_stride);
+    recon_block(h, h->eu, h->cu, h->c_stride, 4, ff_cavs_chroma_qp[h->qp], 1);
+    h->intra_pred_c[pred_mode_uv](h->cv, &h->top_border_v[h->mbx*10],
+                                  h->left_border_v, h->c_stride);
+    recon_block(h, h->ev, h->cv, h->c_stride, 5, ff_cavs_chroma_qp[h->qp], 1);
+
+    ff_cavs_modify_mb_i(h,&pred_mode_uv);
+
+    if(h->pic_type != FF_I_TYPE)
+        set_ue_golomb(pb, cbp_enc_tab[h->cbp][0] + 4);
+    for(block=0;block<4;block++)
+        if(coded_mode[block] == 4)
+            put_bits(pb,1,1);
+        else
+            put_bits(pb,3,coded_mode[block]);
+    set_ue_golomb(pb,coded_mode[4]);
+    if(h->pic_type == FF_I_TYPE)
+        set_ue_golomb(pb, cbp_enc_tab[h->cbp][0]);
+    encode_residual(h, 1);
+    ff_cavs_filter(h,I_8X8);
+    set_mv_intra(h);
+}
+
+/*****************************************************************************
+ *
+ * inter P encoding
+ *
+ ****************************************************************************/
+
+static inline int mv_cost(vector_t *mv, vector_t *pmv) {
+  int x = FFABS(mv->x - pmv->x);
+  int y = FFABS(mv->y - pmv->y);
+  return  2*(av_log2(x*2)+av_log2(y*2))+2;
+}
+
+static inline int check_mv(AVSContext *h, int x, int y, vector_t *pmv,
+                           vector_t *mvmin, int *min_cost) {
+  int diff;
+
+  h->mv[MV_FWD_X0].x = x;
+  h->mv[MV_FWD_X0].y = y;
+  ff_cavs_inter(h, P_16X16);
+  diff = h->s.dsp.sse[0](NULL, h->ey, h->cy, h->l_stride, 16);
+  if((mv_cost(&h->mv[MV_FWD_X0],pmv)*h->lambda + diff) < *min_cost) {
+      *mvmin = h->mv[MV_FWD_X0];
+      *min_cost = mv_cost(&h->mv[MV_FWD_X0],pmv)*h->lambda + diff;
+      return 1;
+  }
+  return 0;
+}
+
+static void encode_mb_p(AVSContext *h) {
+    PutBitContext *pb = &h->s.pb;
+    int lscore = 0;
+    int cscore = 0;
+
+    h->lambda = h->qp/8;
+
+    /* test if we can skip */
+    ff_cavs_mv(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_PSKIP, BLK_16X16, 0);
+    ff_cavs_inter(h, P_SKIP);
+    recon_mb(h, &lscore, &cscore);
+    if(lscore < 6 && cscore < 7) {
+        h->skip_count++;
+        if(h->cbp) //there can be an undecimated block
+            ff_cavs_inter(h, P_SKIP);
+        set_intra_mode_default(h);
+        ff_cavs_filter(h,P_SKIP);
+    } else {
+        int hex[6][2] = {{-4,-8},{4,-8},{8,0},{4,8},{-4,8},{-8,0}};
+        vector_t pmv[2], mv[2], mvmin;
+        int i, sub, x, y, min_cost[2], ref, count;
+        int newmv = 1;
+
+        set_ue_golomb(pb,h->skip_count);
+        h->skip_count = h->cbp = 0;
+
+        for(ref=0;ref<=h->mrefs;ref++) {
+            /* get predicted mv */
+            h->mv[MV_FWD_X0].ref=mv[ref].ref=pmv[ref].ref=mvmin.ref = ref;
+            ff_cavs_mv(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_ENC, BLK_16X16, ref);
+            pmv[ref] = h->mv[MV_FWD_X0];
+
+            /* predicted full-pel mv as reference */
+            h->mv[MV_FWD_X0].x = mv[ref].x = mvmin.x = (pmv[ref].x+2) & ~3;
+            h->mv[MV_FWD_X0].y = mv[ref].y = mvmin.y = (pmv[ref].y+2) & ~3;
+            ff_cavs_inter(h, P_16X16);
+            min_cost[ref] = h->s.dsp.sse[0](NULL,h->ey,h->cy,h->l_stride,16) +
+                mv_cost(&mv[ref],&pmv[ref])*h->lambda;
+
+            /* iterative hex search */
+            count = ME_ITER;
+            while(newmv && count--) {
+                newmv = 0;
+                for(i=0;i<6;i++)
+                    if(check_mv(h, mv[ref].x+hex[i][0], mv[ref].y+hex[i][1],
+                                &pmv[ref], &mvmin, &min_cost[ref]))
+                        newmv = 1;
+                mv[ref] = mvmin;
+            }
+
+            /* refinement (full,half,quarter) */
+            for(sub=4;sub>0;sub>>=1) {
+                for(x=-sub;x<=sub;x+=sub)
+                    for(y=-sub;y<=sub;y+=sub)
+                        if(x|y)
+                            check_mv(h, mv[ref].x+x, mv[ref].y+y,
+                                     &pmv[ref], &mvmin, &min_cost[ref]);
+                mv[ref] = mvmin;
+            }
+        }
+        ref = (h->mrefs && (min_cost[1] < min_cost[0]));
+        if(min_cost[ref] < ME_THRES) {
+            h->mv[MV_FWD_X0] = mv[ref];
+            set_mvs(&h->mv[MV_FWD_X0],BLK_16X16);
+            ff_cavs_inter(h, P_16X16);
+            recon_mb(h,&lscore,&cscore);
+            set_intra_mode_default(h);
+            set_ue_golomb(pb,0); //mb_type is P_16X16
+            if(h->mrefs)
+                put_bits(pb,1,ref);
+            set_se_golomb(pb,mv[ref].x - pmv[ref].x); //mvd
+            set_se_golomb(pb,mv[ref].y - pmv[ref].y);
+            set_ue_golomb(pb, cbp_enc_tab[h->cbp][1]);
+            encode_residual(h, 0);
+            ff_cavs_filter(h,P_16X16);
+        } else {
+            encode_mb_i(h);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ * header encoding
+ *
+ ****************************************************************************/
+
+static int encode_pic(AVSContext *h, Picture *p) {
+    MpegEncContext *s = &h->s;
+    int deblock = 1;
+
+    if (!s->context_initialized) {
+        s->avctx->idct_algo = FF_IDCT_CAVS;
+        if (MPV_common_init(s) < 0)
+            return -1;
+        ff_init_scantable(s->dsp.idct_permutation,&h->scantable,ff_zigzag_direct);
+    }
+    put_bits(&s->pb,16,0xdeaf); //TODO correct bbv_dwlay
+    if(h->pic_type == FF_I_TYPE)
+        put_bits(&s->pb,1,0); //time_code not present
+    else
+        put_bits(&s->pb,2,1); //picture type P
+    s->avctx->get_buffer(s->avctx, (AVFrame *)&h->picture);
+    ff_cavs_init_pic(h);
+    put_bits(&s->pb,8,h->poc);
+    h->picture.poc = h->poc*2;
+    h->dist[0] = (h->picture.poc - h->DPB[0].poc  + 512) % 512;
+    h->dist[1] = (h->picture.poc - h->DPB[1].poc  + 512) % 512;
+    h->scale_den[0] = h->dist[0] ? 512/h->dist[0] : 0;
+    h->scale_den[1] = h->dist[1] ? 512/h->dist[1] : 0;
+    h->poc = (h->poc + 1) & 0x7F;
+
+    if(s->low_delay)
+        set_ue_golomb(&s->pb,0); //TODO correct bbv_check_times?
+    put_bits(&s->pb,1,1); //set progressive
+
+    put_bits(&s->pb,1,1); //top_field_first
+    put_bits(&s->pb,1,0); //repeat_first_field
+    put_bits(&s->pb,1,1); //set qp_fixed
+    put_bits(&s->pb,6,h->qp);
+    if(h->pic_type == FF_I_TYPE) {
+        put_bits(&s->pb,4,0); //reserved bits
+    } else {
+        h->mrefs = !!h->DPB[1].data[0] && (s->avctx->refs > 1);
+        if(!(h->pic_type == FF_B_TYPE))
+            put_bits(&s->pb,1,!h->mrefs); //multiple references
+        put_bits(&s->pb,4,0);  //reserved bits
+        put_bits(&s->pb,1,1); //skip_mode_flag
+    }
+    put_bits(&s->pb,1,!deblock);
+    if(deblock) {
+        if(h->s.avctx->deblockalpha || h->s.avctx->deblockbeta) {
+            put_bits(&s->pb,1,1); //deblocking params coded
+            set_se_golomb(&s->pb, h->s.avctx->deblockalpha);
+            set_se_golomb(&s->pb, h->s.avctx->deblockbeta);
+        } else {
+            put_bits(&s->pb,1,0); //deblocking params not coded
+        }
+    }
+    h->skip_count = 0;
+    do {
+        h->ey = p->data[0] + h->mby*16*h->l_stride + h->mbx*16;
+        h->eu = p->data[1] + h->mby*8*h->c_stride + h->mbx*8;
+        h->ev = p->data[2] + h->mby*8*h->c_stride + h->mbx*8;
+        ff_cavs_init_mb(h);
+        if(h->pic_type == FF_I_TYPE)
+            encode_mb_i(h);
+        else
+            encode_mb_p(h);
+    } while(ff_cavs_next_mb(h));
+    if(h->DPB[1].data[0])
+        s->avctx->release_buffer(s->avctx, (AVFrame *)&h->DPB[1]);
+    memcpy(&h->DPB[1], &h->DPB[0], sizeof(Picture));
+    memcpy(&h->DPB[0], &h->picture, sizeof(Picture));
+    return 0;
+}
+
+static int encode_seq_header(AVSContext *h) {
+    MpegEncContext *s = &h->s;
+    int frame_rate_code = -1;
+    int i;
+
+    s->width = h->s.avctx->width;
+    s->height = h->s.avctx->height;
+    put_bits(&s->pb,8,h->profile);
+    put_bits(&s->pb,8,h->level);
+    put_bits(&s->pb,1,1); //progressive sequence only
+    put_bits(&s->pb,14,s->width);
+    put_bits(&s->pb,14,s->height);
+    put_bits(&s->pb,2,1); // 1 = YUV 4:2:0, 2 = YUV 4:2:2
+    put_bits(&s->pb,3,1); //sample_precision 8bits
+    put_bits(&s->pb,4,h->aspect_ratio);
+    for(i=0;i<15;i++)
+        if((ff_frame_rate_tab[i].den == s->avctx->time_base.num) &&
+           (ff_frame_rate_tab[i].num == s->avctx->time_base.den))
+            frame_rate_code = i;
+    if(frame_rate_code < 0) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "unsupported framerate %d/%d\n",
+               s->avctx->time_base.den, s->avctx->time_base.num);
+        return -1;
+    }
+    put_bits(&s->pb,4,frame_rate_code & 0xF);
+    put_bits(&s->pb,18,0); //bit_rate_lower
+    put_bits(&s->pb,1,1); //marker_bit
+    put_bits(&s->pb,12,1); //bit_rate_upper
+    put_bits(&s->pb,1,s->low_delay);
+    put_bits(&s->pb,1,1); //marker_bit
+    put_bits(&s->pb,18,225); //bbv_buffer_size, enough for level 6.2
+    put_bits(&s->pb,3,0); //reserved_bits
+
+    h->mb_width  = (s->width  + 15) >> 4;
+    h->mb_height = (s->height + 15) >> 4;
+    if(!h->top_qp) {
+        ff_cavs_init_top_lines(h);
+        h->levels[0] = av_mallocz(6*64*sizeof(DCTELEM));
+        h->runs[0]   = av_mallocz(6*64);
+        for(i=1;i<6;i++) {
+            h->levels[i] = h->levels[i-1] + 64;
+            h->runs[i]   = h->runs[i-1]   + 64;
+        }
+    }
+    return 0;
+}
+
+static int cavs_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    AVSContext *h = avctx->priv_data;
+    MpegEncContext * const s = &h->s;
+    AVFrame *pict = data;
+    const int width= s->avctx->width;
+    const int height= s->avctx->height;
+    int size, i, y;
+
+    for(i=0; i<3; i++){
+        int shift= !!i;
+        for(y=0; y<(height>>shift); y++)
+            memcpy(&h->input_picture.data[i][y * h->input_picture.linesize[i]],
+                   &pict->data[i][y * pict->linesize[i]],
+                   width>>shift);
+    }
+    if(avctx->gop_size == 0 || h->poc % avctx->gop_size == 0) {
+        h->input_picture.pict_type= I_TYPE;
+        h->pic_type= FF_I_TYPE;
+        h->input_picture.key_frame= 1;
+    } else {
+        h->input_picture.pict_type= P_TYPE;
+        h->pic_type= FF_P_TYPE;
+    }
+
+    init_put_bits(&s->pb, buf, buf_size);
+    if(h->pic_type == FF_I_TYPE) {
+        if(h->DPB[0].data[0])
+            avctx->release_buffer(avctx, (AVFrame *)&h->DPB[0]);
+        if(h->DPB[1].data[0])
+            avctx->release_buffer(avctx, (AVFrame *)&h->DPB[1]);
+        put_bits(&s->pb,16,0);
+        put_bits(&s->pb,16,CAVS_START_CODE);
+        if(encode_seq_header(h) < 0)
+            return -1;
+        align_put_bits(&s->pb);
+        put_bits(&s->pb,16,0);
+        put_bits(&s->pb,16,PIC_I_START_CODE);
+    } else {
+        put_bits(&s->pb,16,0);
+        put_bits(&s->pb,16,PIC_PB_START_CODE);
+    }
+    encode_pic(h, &h->input_picture);
+    align_put_bits(&s->pb);
+    flush_put_bits(&s->pb);
+    size = put_bits_count(&s->pb)/8;
+    if(avctx->flags&CODEC_FLAG_PSNR) {
+        int i,x,y;
+        for(i=0;i<3;i++) {
+            int wi = s->width >> (!!i);
+            int he = s->height >> (!!i);
+            int64_t error = 0;
+            for(y=0;y<he;y++)
+                for(x=0;x<wi;x++) {
+                    int d = h->picture.data[i][y*h->picture.linesize[i] + x]
+                               - pict->data[i][y*     pict->linesize[i] + x];
+                    error += d*d;
+                }
+            s->avctx->error[i] += error;
+            h->picture.error[i] = error;
+        }
+    }
+    memset(&h->picture,0,sizeof(Picture));
+    return size;
+}
+
+static void init_enc_vlc(enc_2dvlc_t *enc, const dec_2dvlc_t *dec, int count) {
+    int i,j;
+
+    for(j=0;j<count;j++) {
+        memset(enc, -1, sizeof(enc_2dvlc_t));
+        for(i=0;i<ESCAPE_CODE;i++) {
+            if(dec->rltab[i][0]) {
+                enc->rlcode[dec->rltab[i][1]-1][dec->rltab[i][0]-1] = i;
+                i++; //skip negative level
+            } else
+                enc->end_code = i;
+        }
+        enc->dec = dec;
+        enc++;
+        dec++;
+    }
+}
+
+static int cavs_encode_init(AVCodecContext * avctx) {
+    AVSContext *h = avctx->priv_data;
+    MpegEncContext * const s = &h->s;
+
+    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
+        av_log(avctx, AV_LOG_ERROR, "Chinese AVS encoder has not been tested for standard compliance.\n"
+               "use vstrict=-2 / -strict -2 to use it anyway\n");
+        return -1;
+    }
+    if(avctx->cqp > -1)
+        h->qp = avctx->cqp;
+    else {
+        av_log(avctx, AV_LOG_ERROR, "fixed qp encoding only, use -cqp\n");
+        return -1;
+    }
+    ff_cavs_init(avctx);
+    init_enc_vlc(intra_enc,  ff_cavs_intra_dec,  7);
+    init_enc_vlc(inter_enc,  ff_cavs_inter_dec,  7);
+    init_enc_vlc(chroma_enc, ff_cavs_chroma_dec, 5);
+    s->avctx = avctx;
+    s->avctx->get_buffer(s->avctx, (AVFrame *)&h->input_picture);
+    h->poc = 0;
+    return 0;
+}
+
+AVCodec cavs_encoder = {
+    "cavs",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_CAVS,
+    sizeof(AVSContext),
+    cavs_encode_init,
+    cavs_encode_frame,
+    ff_cavs_end,
+};
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 2d312e5..d5ede49 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -297,6 +297,9 @@ typedef struct DSPContext {
     void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
     void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
     void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*cavs_sub_dct8)(uint8_t *src1, uint8_t *src2, DCTELEM *block,
+                          int stride1, int stride2);
+    void (*cavs_quant)(DCTELEM *block, const uint16_t *norm,int mul, int bias);
 
     me_cmp_func pix_abs[2][4];
 
-- 
1.7.1


--------------010605070306040906030502--