[FFmpeg-devel] [PATCH] Use [48]x[48] DCTs for fasttx mode in WMV3

Kostya kostya.shishkov
Sun Dec 2 18:22:03 CET 2007


Here is a patch which adds 8x4, 4x8 and 4x4 reference DCTs
to use them in WMV3 with fasttx mode. This fixes some small
artefacts caused by another transform used.
-------------- next part --------------
Index: libavcodec/simple_idct.h
===================================================================
--- libavcodec/simple_idct.h	(revision 11125)
+++ libavcodec/simple_idct.h	(working copy)
@@ -43,4 +43,8 @@
 void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block);
 void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block);
 
+void simple_idct84(DCTELEM *block);
+void simple_idct48(DCTELEM *block);
+void simple_idct44(DCTELEM *block);
+
 #endif /* FFMPEG_SIMPLE_IDCT_H */
Index: libavcodec/simple_idct.c
===================================================================
--- libavcodec/simple_idct.c	(revision 11125)
+++ libavcodec/simple_idct.c	(working copy)
@@ -428,7 +428,7 @@
    and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
 #define C_SHIFT (4+1+12)
 
-static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col)
+static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col)
 {
     int c0, c1, c2, c3, a0, a1, a2, a3;
     const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -491,8 +491,8 @@
 
     /* IDCT4 and store */
     for(i=0;i<8;i++) {
-        idct4col(dest + i, 2 * line_size, block + i);
-        idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
+        idct4col_put(dest + i, 2 * line_size, block + i);
+        idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
     }
 }
 
@@ -530,6 +530,25 @@
     dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
 }
 
+static inline void idct4col(DCTELEM *col)
+{
+    int c0, c1, c2, c3, a0, a1, a2, a3;
+    //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    a0 = col[8*0];
+    a1 = col[8*1];
+    a2 = col[8*2];
+    a3 = col[8*3];
+    c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
+    c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
+    c1 = a1 * C1 + a3 * C2;
+    c3 = a1 * C2 - a3 * C1;
+    col[8*0] = (c0 + c1) >> C_SHIFT;
+    col[8*1] = (c2 + c3) >> C_SHIFT;
+    col[8*2] = (c2 - c3) >> C_SHIFT;
+    col[8*3] = (c0 - c1) >> C_SHIFT;
+}
+
 #define RN_SHIFT 15
 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
 #define R1 R_FIX(0.6532814824)
@@ -585,3 +604,47 @@
     }
 }
 
+void simple_idct84(DCTELEM *block)
+{
+    int i;
+
+    /* IDCT8 on each line */
+    for(i=0; i<4; i++) {
+        idctRowCondDC(block + i*8);
+    }
+
+    /* IDCT4 and store */
+    for(i=0;i<8;i++) {
+        idct4col(block + i);
+    }
+}
+
+void simple_idct48(DCTELEM *block)
+{
+    int i;
+
+    /* IDCT4 on each line */
+    for(i=0; i<8; i++) {
+        idct4row(block + i*8);
+    }
+
+    /* IDCT8 and store */
+    for(i=0; i<4; i++){
+        idctSparseCol(block + i);
+    }
+}
+
+void simple_idct44(DCTELEM *block)
+{
+    int i;
+
+    /* IDCT4 on each line */
+    for(i=0; i<4; i++) {
+        idct4row(block + i*8);
+    }
+
+    /* IDCT8 and store */
+    for(i=0; i<4; i++){
+        idct4col(block + i);
+    }
+}
Index: libavcodec/vc1.c
===================================================================
--- libavcodec/vc1.c	(revision 11125)
+++ libavcodec/vc1.c	(working copy)
@@ -764,6 +764,21 @@
 
 static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb);
 
+static void vc1_inv_trans_8x4_c_fasttx(DCTELEM block[64], int n)
+{
+    simple_idct84(block + n * 32);
+}
+
+static void vc1_inv_trans_4x8_c_fasttx(DCTELEM block[64], int n)
+{
+    simple_idct48(block + n * 4);
+}
+
+static void vc1_inv_trans_4x4_c_fasttx(DCTELEM block[64], int n)
+{
+    simple_idct44(block + (n&1) * 4 + (n&2) * 16);
+}
+
 /**
  * Decode Simple/Main Profiles sequence header
  * @see Figure 7-8, p16-17
@@ -814,6 +829,9 @@
     if (!v->res_fasttx)
     {
         v->s.dsp.vc1_inv_trans_8x8 = simple_idct;
+        v->s.dsp.vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c_fasttx;
+        v->s.dsp.vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c_fasttx;
+        v->s.dsp.vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c_fasttx;
     }
 
     v->fastuvmc =  get_bits1(gb); //common



More information about the ffmpeg-devel mailing list