[FFmpeg-cvslog] commit: H.264: switch to x264-style tracking of luma/chroma DC NNZ ( Jason Garrett-Glaser )

git at videolan.org git
Fri Jan 14 23:11:16 CET 2011


ffmpeg | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Fri Jan 14 21:36:16 2011 +0000| [5657d14094e0b9d3f277322e49442592973bbdac] | committer: Jason Garrett-Glaser 

H.264: switch to x264-style tracking of luma/chroma DC NNZ
Useful so that we don't have to run the hierarchical DC iDCT if there aren't
any coefficients.  Opens up some future opportunities for optimization as well.

Originally committed as revision 26337 to svn://svn.ffmpeg.org/ffmpeg/trunk

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5657d14094e0b9d3f277322e49442592973bbdac
---

 libavcodec/h264.c       |    8 ++++++--
 libavcodec/h264.h       |   19 ++++++++++++++++---
 libavcodec/h264_cabac.c |   11 ++++++-----
 libavcodec/h264_cavlc.c |   10 +++++-----
 4 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index f347047..a819ee8 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1203,6 +1203,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                 }
             }else{
                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+                if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
                 if(is_h264){
                     if(!transform_bypass)
                         h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
@@ -1214,6 +1215,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                     }
                 }else
                     ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
+                }
             }
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
@@ -1281,8 +1283,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                     }
                 }
             }else{
-                chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
-                chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
+                    chroma_dc_dequant_idct_c(h->mb + 16*16     , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
+                    chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                 if(is_h264){
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 189864b..17fd680 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -39,8 +39,8 @@
 #define interlaced_dct interlaced_dct_is_a_bad_name
 #define mb_intra mb_intra_is_not_initialized_see_mb_type
 
-#define LUMA_DC_BLOCK_INDEX   25
-#define CHROMA_DC_BLOCK_INDEX 26
+#define LUMA_DC_BLOCK_INDEX   24
+#define CHROMA_DC_BLOCK_INDEX 25
 
 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
 #define COEFF_TOKEN_VLC_BITS           8
@@ -722,8 +722,20 @@ o-o o-o
  / / /
 o-o o-o
 */
+
+/* Scan8 organization:
+ *   0 1 2 3 4 5 6 7
+ * 0   u u y y y y y
+ * 1 u U U y Y Y Y Y
+ * 2 u U U y Y Y Y Y
+ * 3   v v y Y Y Y Y
+ * 4 v V V y Y Y Y Y
+ * 5 v V V   DYDUDV
+ * DY/DU/DV are for luma/chroma DC.
+ */
+
 //This table must be here because scan8[constant] must be known at compiletime
-static const uint8_t scan8[16 + 2*4]={
+static const uint8_t scan8[16 + 2*4 + 3]={
  4+1*8, 5+1*8, 4+2*8, 5+2*8,
  6+1*8, 7+1*8, 6+2*8, 7+2*8,
  4+3*8, 5+3*8, 4+4*8, 5+4*8,
@@ -732,6 +744,7 @@ static const uint8_t scan8[16 + 2*4]={
  1+2*8, 2+2*8,
  1+4*8, 2+4*8,
  1+5*8, 2+5*8,
+ 4+5*8, 5+5*8, 6+5*8
 };
 
 static av_always_inline uint32_t pack16to32(int a, int b){
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 971af37..3744095 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -965,6 +965,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
             nza = h->left_cbp&0x100;
             nzb = h-> top_cbp&0x100;
         } else {
+            idx -= CHROMA_DC_BLOCK_INDEX;
             nza = (h->left_cbp>>(6+idx))&0x01;
             nzb = (h-> top_cbp>>(6+idx))&0x01;
         }
@@ -1060,8 +1061,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     /* read coded block flag */
     if( is_dc || cat != 5 ) {
         if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
-            if( !is_dc )
-                h->non_zero_count_cache[scan8[n]] = 0;
+            h->non_zero_count_cache[scan8[n]] = 0;
 
 #ifdef CABAC_ON_STACK
             h->cabac.range     = cc.range     ;
@@ -1112,7 +1112,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
         if( cat == 0 )
             h->cbp_table[h->mb_xy] |= 0x100;
         else
-            h->cbp_table[h->mb_xy] |= 0x40 << n;
+            h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX);
+        h->non_zero_count_cache[scan8[n]] = coeff_count;
     } else {
         if( cat == 5 )
             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
@@ -1642,7 +1643,7 @@ decode_intra_mb:
             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
             AV_ZERO128(h->mb_luma_dc+0);
             AV_ZERO128(h->mb_luma_dc+8);
-            decode_cabac_residual_dc( h, h->mb_luma_dc, 0, 0, scan, 16);
+            decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
 
             if( cbp&15 ) {
                 qmul = h->dequant4_coeff[0][s->qscale];
@@ -1681,7 +1682,7 @@ decode_intra_mb:
             int c;
             for( c = 0; c < 2; c++ ) {
                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
-                decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, 4);
+                decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
             }
         }
 
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 426a285..b8bc450 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -371,7 +371,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     //FIXME put trailing_onex into the context
 
-    if(n == CHROMA_DC_BLOCK_INDEX){
+    if(n >= CHROMA_DC_BLOCK_INDEX){
         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
         total_coeff= coeff_token>>2;
     }else{
@@ -383,9 +383,9 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
             total_coeff= pred_non_zero_count(h, n);
             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
             total_coeff= coeff_token>>2;
-            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
         }
     }
+    h->non_zero_count_cache[ scan8[n] ]= total_coeff;
 
     //FIXME set last_non_zero?
 
@@ -482,14 +482,14 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     if(total_coeff == max_coeff)
         zeros_left=0;
     else{
-        if(n == CHROMA_DC_BLOCK_INDEX)
+        if(n >= CHROMA_DC_BLOCK_INDEX)
             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
         else
             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
     }
 
     scantable += zeros_left + total_coeff - 1;
-    if(n > 24){
+    if(n >= LUMA_DC_BLOCK_INDEX){
         block[*scantable] = level[0];
         for(i=1;i<total_coeff && zeros_left > 0;i++) {
             if(zeros_left < 7)
@@ -988,7 +988,7 @@ decode_intra_mb:
 
         if(cbp&0x30){
             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
+                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
                     return -1;
                 }
         }




More information about the ffmpeg-cvslog mailing list