[FFmpeg-cvslog] Add weighted motion compensation for RV40 B-frames

Kostya Shishkov git at videolan.org
Sat Aug 13 03:18:01 CEST 2011


ffmpeg | branch: master | Kostya Shishkov <kostya.shishkov at gmail.com> | Wed Aug 10 11:26:39 2011 +0200| [b86ab38137be34376c90d45d08d49dbd28f2a72f] | committer: Ronald S. Bultje

Add weighted motion compensation for RV40 B-frames

Signed-off-by: Ronald S. Bultje <rsbultje at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b86ab38137be34376c90d45d08d49dbd28f2a72f
---

 libavcodec/rv34.c        |   76 ++++++++++++++++---
 libavcodec/rv34.h        |    5 +
 libavcodec/rv34dsp.h     |    6 ++
 libavcodec/rv40dsp.c     |   20 +++++
 tests/ref/fate/real-rv40 |  186 +++++++++++++++++++++++-----------------------
 5 files changed, 189 insertions(+), 104 deletions(-)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 58e4552..cdc559f 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -717,7 +717,7 @@ static const int chroma_coeffs[3] = { 0, 3, 5 };
 static inline void rv34_mc(RV34DecContext *r, const int block_type,
                           const int xoff, const int yoff, int mv_off,
                           const int width, const int height, int dir,
-                          const int thirdpel,
+                          const int thirdpel, int weighted,
                           qpel_mc_func (*qpel_mc)[16],
                           h264_chroma_mc_func (*chroma_mc))
 {
@@ -781,9 +781,15 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
         srcU = uvbuf;
         srcV = uvbuf + 16;
     }
-    Y = s->dest[0] + xoff      + yoff     *s->linesize;
-    U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
-    V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
+    if(!weighted){
+        Y = s->dest[0] + xoff      + yoff     *s->linesize;
+        U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
+        V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
+    }else{
+        Y = r->tmp_b_block_y [dir]     +  xoff     +  yoff    *s->linesize;
+        U = r->tmp_b_block_uv[dir*2]   + (xoff>>1) + (yoff>>1)*s->uvlinesize;
+        V = r->tmp_b_block_uv[dir*2+1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
+    }
 
     if(block_type == RV34_MB_P_16x8){
         qpel_mc[1][dxy](Y, srcY, s->linesize);
@@ -804,33 +810,70 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type,
                         const int xoff, const int yoff, int mv_off,
                         const int width, const int height, int dir)
 {
-    rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30,
+    rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30, 0,
             r->rdsp.put_pixels_tab,
             r->rdsp.put_chroma_pixels_tab);
 }
 
+static void rv4_weight(RV34DecContext *r)
+{
+    r->rdsp.rv40_weight_pixels_tab[0](r->s.dest[0],
+                                      r->tmp_b_block_y[0],
+                                      r->tmp_b_block_y[1],
+                                      r->weight1,
+                                      r->weight2,
+                                      r->s.linesize);
+    r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[1],
+                                      r->tmp_b_block_uv[0],
+                                      r->tmp_b_block_uv[2],
+                                      r->weight1,
+                                      r->weight2,
+                                      r->s.uvlinesize);
+    r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[2],
+                                      r->tmp_b_block_uv[1],
+                                      r->tmp_b_block_uv[3],
+                                      r->weight1,
+                                      r->weight2,
+                                      r->s.uvlinesize);
+}
+
 static void rv34_mc_2mv(RV34DecContext *r, const int block_type)
 {
-    rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30,
+    int weighted = !r->rv30 && block_type != RV34_MB_B_BIDIR && r->weight1 != 8192;
+
+    rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30, weighted,
             r->rdsp.put_pixels_tab,
             r->rdsp.put_chroma_pixels_tab);
-    rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30,
-            r->rdsp.avg_pixels_tab,
-            r->rdsp.avg_chroma_pixels_tab);
+    if(!weighted){
+        rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 0,
+                r->rdsp.avg_pixels_tab,
+                r->rdsp.avg_chroma_pixels_tab);
+    }else{
+        rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 1,
+                r->rdsp.put_pixels_tab,
+                r->rdsp.put_chroma_pixels_tab);
+        rv4_weight(r);
+    }
 }
 
 static void rv34_mc_2mv_skip(RV34DecContext *r)
 {
     int i, j;
+    int weighted = !r->rv30 && r->weight1 != 8192;
+
     for(j = 0; j < 2; j++)
         for(i = 0; i < 2; i++){
              rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30,
+                     weighted,
                      r->rdsp.put_pixels_tab,
                      r->rdsp.put_chroma_pixels_tab);
              rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30,
-                     r->rdsp.avg_pixels_tab,
-                     r->rdsp.avg_chroma_pixels_tab);
+                     weighted,
+                     weighted ? r->rdsp.put_pixels_tab : r->rdsp.avg_pixels_tab,
+                     weighted ? r->rdsp.put_chroma_pixels_tab : r->rdsp.avg_chroma_pixels_tab);
         }
+    if(weighted)
+        rv4_weight(r);
 }
 
 /** number of motion vectors in each macroblock type */
@@ -1265,6 +1308,16 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
         if(MPV_frame_start(s, s->avctx) < 0)
             return -1;
         ff_er_frame_start(s);
+        if (!r->tmp_b_block_base || s->width != r->si.width || s->height != r->si.height) {
+            int i;
+
+            r->tmp_b_block_base = av_realloc(r->tmp_b_block_base, s->linesize * 48);
+            for (i = 0; i < 2; i++)
+                r->tmp_b_block_y[i] = r->tmp_b_block_base + i * 16 * s->linesize;
+            for (i = 0; i < 4; i++)
+                r->tmp_b_block_uv[i] = r->tmp_b_block_base + 32 * s->linesize
+                                       + (i >> 1) * 8 * s->uvlinesize + (i & 1) * 16;
+        }
         r->cur_pts = r->si.pts;
         if(s->pict_type != AV_PICTURE_TYPE_B){
             r->last_pts = r->next_pts;
@@ -1500,6 +1553,7 @@ av_cold int ff_rv34_decode_end(AVCodecContext *avctx)
 
     av_freep(&r->intra_types_hist);
     r->intra_types = NULL;
+    av_freep(&r->tmp_b_block_base);
     av_freep(&r->mb_type);
     av_freep(&r->cbp_luma);
     av_freep(&r->cbp_chroma);
diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h
index ef19813..12607fb 100644
--- a/libavcodec/rv34.h
+++ b/libavcodec/rv34.h
@@ -116,6 +116,11 @@ typedef struct RV34DecContext{
     /** 8x8 block available flags (for MV prediction) */
     DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4];
 
+    /** temporary blocks for RV4 weighted MC */
+    uint8_t *tmp_b_block_y[2];
+    uint8_t *tmp_b_block_uv[4];
+    uint8_t *tmp_b_block_base;
+
     int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
     int (*decode_mb_info)(struct RV34DecContext *r);
     int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst);
diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h
index 771a6c0..e1def7d 100644
--- a/libavcodec/rv34dsp.h
+++ b/libavcodec/rv34dsp.h
@@ -29,11 +29,17 @@
 
 #include "dsputil.h"
 
+typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
+                                 uint8_t *src1/*align width (8 or 16)*/,
+                                 uint8_t *src2/*align width (8 or 16)*/,
+                                 int w1, int w2, int stride);
+
 typedef struct RV34DSPContext {
     qpel_mc_func put_pixels_tab[4][16];
     qpel_mc_func avg_pixels_tab[4][16];
     h264_chroma_mc_func put_chroma_pixels_tab[3];
     h264_chroma_mc_func avg_chroma_pixels_tab[3];
+    rv40_weight_func rv40_weight_pixels_tab[2];
 } RV34DSPContext;
 
 void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp);
diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
index 132f063..ca620ab 100644
--- a/libavcodec/rv40dsp.c
+++ b/libavcodec/rv40dsp.c
@@ -285,6 +285,23 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
 RV40_CHROMA_MC(put_, op_put)
 RV40_CHROMA_MC(avg_, op_avg)
 
+#define RV40_WEIGHT_FUNC(size) \
+static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\
+{\
+    int i, j;\
+\
+    for (j = 0; j < size; j++) {\
+        for (i = 0; i < size; i++)\
+            dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
+        src1 += stride;\
+        src2 += stride;\
+        dst  += stride;\
+    }\
+}
+
+RV40_WEIGHT_FUNC(16)
+RV40_WEIGHT_FUNC(8)
+
 av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
     c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0];
     c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
@@ -356,6 +373,9 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
     c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
     c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
 
+    c->rv40_weight_pixels_tab[0] = rv40_weight_func_16;
+    c->rv40_weight_pixels_tab[1] = rv40_weight_func_8;
+
     if (HAVE_MMX)
         ff_rv40dsp_init_x86(c, dsp);
 }
diff --git a/tests/ref/fate/real-rv40 b/tests/ref/fate/real-rv40
index 2a445d0..990a530 100644
--- a/tests/ref/fate/real-rv40
+++ b/tests/ref/fate/real-rv40
@@ -16,106 +16,106 @@
 0, 112500, 276480, 0x5f7a0d4f
 0, 120000, 276480, 0x5f7a0d4f
 0, 127500, 276480, 0x5f7a0d4f
-0, 135000, 276480, 0x2d722f8a
-0, 142500, 276480, 0xebbb3c8f
-0, 150000, 276480, 0x8574c868
+0, 135000, 276480, 0x75641594
+0, 142500, 276480, 0x32ee3526
+0, 150000, 276480, 0x5ce39368
 0, 157500, 276480, 0x4ec1e418
-0, 165000, 276480, 0x95f22651
-0, 172500, 276480, 0x071d897e
-0, 180000, 276480, 0x9f7623f9
-0, 187500, 276480, 0x86d4dedf
-0, 195000, 276480, 0xc0a0be22
-0, 202500, 276480, 0xc5902aec
-0, 210000, 276480, 0xe000f066
-0, 217500, 276480, 0x0b2a48d5
-0, 225000, 276480, 0xa1565256
-0, 232500, 276480, 0x8de3ceb3
-0, 240000, 276480, 0x654b564a
+0, 165000, 276480, 0x85cbc3b5
+0, 172500, 276480, 0x377c7b46
+0, 180000, 276480, 0x756a4a2e
+0, 187500, 276480, 0xcb379547
+0, 195000, 276480, 0x99c085be
+0, 202500, 276480, 0xe479ffed
+0, 210000, 276480, 0x1e4fae19
+0, 217500, 276480, 0x776412ef
+0, 225000, 276480, 0x58ce0f38
+0, 232500, 276480, 0x5ab69b27
+0, 240000, 276480, 0xc3db9706
 0, 247500, 276480, 0xc9c57884
-0, 255000, 276480, 0x89cdcdd4
-0, 262500, 276480, 0x3594fe61
-0, 270000, 276480, 0x9d082a81
-0, 277500, 276480, 0x4e6cd0c3
-0, 285000, 276480, 0xc129765f
-0, 292500, 276480, 0x92a04c99
-0, 300000, 276480, 0x5ca62953
-0, 307500, 276480, 0xb7e478aa
-0, 315000, 276480, 0x932735d5
-0, 322500, 276480, 0xaaa2d7aa
-0, 330000, 276480, 0xd1329996
+0, 255000, 276480, 0x000b5269
+0, 262500, 276480, 0x27ff7a5d
+0, 270000, 276480, 0x70647530
+0, 277500, 276480, 0x97612c4b
+0, 285000, 276480, 0xdf4e04d7
+0, 292500, 276480, 0xbd98f57c
+0, 300000, 276480, 0x5163b29b
+0, 307500, 276480, 0x99170e64
+0, 315000, 276480, 0x8a4e991f
+0, 322500, 276480, 0x6a45425f
+0, 330000, 276480, 0x7bf6b1ef
 0, 337500, 276480, 0x6de1e34b
-0, 345000, 276480, 0x8c963c9b
-0, 352500, 276480, 0xce6eff29
-0, 360000, 276480, 0x25412f7e
-0, 367500, 276480, 0x11a5ad85
-0, 375000, 276480, 0x26ea3248
-0, 382500, 276480, 0x86c35fa4
-0, 390000, 276480, 0xa98a2d38
-0, 397500, 276480, 0xed827333
-0, 405000, 276480, 0x5d44a824
-0, 412500, 276480, 0x46d54d04
-0, 420000, 276480, 0x413fd26a
+0, 345000, 276480, 0xdcaaa99a
+0, 352500, 276480, 0xd1e98808
+0, 360000, 276480, 0x6e2d524e
+0, 367500, 276480, 0x22c50a3d
+0, 375000, 276480, 0x62b76407
+0, 382500, 276480, 0x51e9b3eb
+0, 390000, 276480, 0x441f7afd
+0, 397500, 276480, 0xfb01efc6
+0, 405000, 276480, 0x294bb441
+0, 412500, 276480, 0xe04ac45e
+0, 420000, 276480, 0x58f275ea
 0, 427500, 276480, 0xf0b3b71b
-0, 435000, 276480, 0x459bc06d
-0, 442500, 276480, 0x4199cd45
-0, 450000, 276480, 0xa8d35683
-0, 457500, 276480, 0x9a3e7de0
-0, 465000, 276480, 0x5a30f666
-0, 472500, 276480, 0x40152668
-0, 480000, 276480, 0x90c4d22c
-0, 487500, 276480, 0x5cbaacc9
-0, 495000, 276480, 0x72b658f1
-0, 502500, 276480, 0x0ba3dcc9
-0, 510000, 276480, 0x259ed5c1
+0, 435000, 276480, 0x674e34e4
+0, 442500, 276480, 0x41dda2d9
+0, 450000, 276480, 0xf46ba7fb
+0, 457500, 276480, 0x28b54815
+0, 465000, 276480, 0xaf2b5d89
+0, 472500, 276480, 0x8facba58
+0, 480000, 276480, 0x28a63236
+0, 487500, 276480, 0x1ad43fd7
+0, 495000, 276480, 0x71507bd2
+0, 502500, 276480, 0x35626022
+0, 510000, 276480, 0x7c1139b3
 0, 517500, 276480, 0x7fd73a99
-0, 525000, 276480, 0x488980c5
-0, 532500, 276480, 0x1d4c96a5
-0, 540000, 276480, 0x41ced7f2
-0, 547500, 276480, 0xd62d1837
-0, 555000, 276480, 0xf5fd9d20
-0, 562500, 276480, 0x2af91fda
-0, 570000, 276480, 0x38ce229d
-0, 577500, 276480, 0xf3a712c0
-0, 585000, 276480, 0x57b111d2
-0, 592500, 276480, 0x8556b792
-0, 600000, 276480, 0xb32d0896
+0, 525000, 276480, 0xb52e1aa2
+0, 532500, 276480, 0xd6f82cae
+0, 540000, 276480, 0xf88f75d4
+0, 547500, 276480, 0x04a8e3ee
+0, 555000, 276480, 0xa29f5b01
+0, 562500, 276480, 0x754ceaf5
+0, 570000, 276480, 0x5a38b4af
+0, 577500, 276480, 0xfcebc261
+0, 585000, 276480, 0x3d3ca985
+0, 592500, 276480, 0x94a03c75
+0, 600000, 276480, 0x2f98911c
 0, 607500, 276480, 0x923b9937
-0, 615000, 276480, 0x0da1e7e3
-0, 622500, 276480, 0x7f172382
-0, 630000, 276480, 0x93622b88
-0, 637500, 276480, 0x2599d540
-0, 645000, 276480, 0xed20c105
-0, 652500, 276480, 0x62ce256e
-0, 660000, 276480, 0x286a04bb
-0, 667500, 276480, 0x423f7e7c
-0, 675000, 276480, 0x21fc252a
-0, 682500, 276480, 0xf8a8e8ee
-0, 690000, 276480, 0x770d4a8d
+0, 615000, 276480, 0xefab7ffd
+0, 622500, 276480, 0x6b9fbc80
+0, 630000, 276480, 0xe4bdbd1e
+0, 637500, 276480, 0x225a56c0
+0, 645000, 276480, 0xf58b1b7c
+0, 652500, 276480, 0xbaffcdcc
+0, 660000, 276480, 0xeb6eb88f
+0, 667500, 276480, 0xdb753d35
+0, 675000, 276480, 0xea80a82e
+0, 682500, 276480, 0x2aae902a
+0, 690000, 276480, 0x9b9ee961
 0, 697500, 276480, 0xaa12b6fd
-0, 705000, 276480, 0xdc7221a8
-0, 712500, 276480, 0x487eeb30
-0, 720000, 276480, 0x1e74f2db
-0, 727500, 276480, 0x40ae2bc3
-0, 735000, 276480, 0x9ca9b930
-0, 742500, 276480, 0x9fb19b0f
-0, 750000, 276480, 0x7bdf836c
-0, 757500, 276480, 0x1e607ba7
-0, 765000, 276480, 0xbd96578b
-0, 772500, 276480, 0x2124bf07
-0, 780000, 276480, 0x4895e27a
+0, 705000, 276480, 0x50c31e73
+0, 712500, 276480, 0xdd9fb89f
+0, 720000, 276480, 0xaf82399a
+0, 727500, 276480, 0x7ce5f23c
+0, 735000, 276480, 0x5aaa7519
+0, 742500, 276480, 0xe45a5599
+0, 750000, 276480, 0x704411fb
+0, 757500, 276480, 0x9d7430a1
+0, 765000, 276480, 0x2c230702
+0, 772500, 276480, 0x4a4f76cd
+0, 780000, 276480, 0x27f54854
 0, 787500, 276480, 0x694d76e3
-0, 795000, 276480, 0xe70df513
-0, 802500, 276480, 0xcacafe6b
-0, 810000, 276480, 0x64087748
-0, 817500, 276480, 0x571fda23
-0, 825000, 276480, 0x8c86cbe9
-0, 832500, 276480, 0xc8ea4671
-0, 840000, 276480, 0xbfb74300
-0, 847500, 276480, 0xbe1e3770
-0, 855000, 276480, 0x757a0232
-0, 862500, 276480, 0xa5f50c84
-0, 870000, 276480, 0x6d95f808
+0, 795000, 276480, 0x525463e2
+0, 802500, 276480, 0x819898f9
+0, 810000, 276480, 0xeeed00fc
+0, 817500, 276480, 0xb6f99ee3
+0, 825000, 276480, 0xefc83107
+0, 832500, 276480, 0xbb22e024
+0, 840000, 276480, 0x300f922a
+0, 847500, 276480, 0x826fc3bd
+0, 855000, 276480, 0x679a53f8
+0, 862500, 276480, 0x976c9e93
+0, 870000, 276480, 0xb194656e
 0, 877500, 276480, 0xf002c5ca
-0, 885000, 276480, 0x1a2abb26
-0, 892500, 276480, 0x6cf69bf2
+0, 885000, 276480, 0xb243dda5
+0, 892500, 276480, 0x1700efbb
 0, 900000, 276480, 0x8f316c66



More information about the ffmpeg-cvslog mailing list