[FFmpeg-devel] MPEG-2 Acceleration Refactor

Greg Hulands ghulands
Mon Jun 18 01:47:58 CEST 2007


> [...]
> Wow!  You've reduced the patch to nothing, and it still gives a
> speedup.  That's impressive work.  I can't wait to test it.
>

Ah crap. Sorry.

Index: mpeg12.c
===================================================================
--- mpeg12.c	(revision 9339)
+++ mpeg12.c	(working copy)
@@ -53,19 +53,19 @@
#endif //CONFIG_ENCODERS
static inline int mpeg1_decode_block_inter(MpegEncContext *s,
                                DCTELEM *block,
-                              int n);
+                              int n,
+                              int fast);
static inline int mpeg1_decode_block_intra(MpegEncContext *s,
                                DCTELEM *block,
                                int n);
-static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s,  
DCTELEM *block, int n);
static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
                                          DCTELEM *block,
-                                        int n);
+                                        int n,
+                                        int fast);
static inline int mpeg2_decode_block_intra(MpegEncContext *s,
                                      DCTELEM *block,
-                                    int n);
-static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext  
*s, DCTELEM *block, int n);
-static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,  
DCTELEM *block, int n);
+                                    int n,
+                                    int fast);
static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred);
static void exchange_uv(MpegEncContext *s);
@@ -1233,11 +1233,11 @@
          if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
              if(s->flags2 & CODEC_FLAG2_FAST){
                  for(i=0;i<6;i++) {
-                    mpeg2_fast_decode_block_intra(s, s->pblocks[i], i);
+                    mpeg2_decode_block_intra(s, s->pblocks[i], i, 1);
                  }
              }else{
                  for(i=0;i<mb_block_count;i++) {
-                    if (mpeg2_decode_block_intra(s, s->pblocks[i],  
i) < 0)
+                    if (mpeg2_decode_block_intra(s, s->pblocks[i],  
i, 0) < 0)
                          return -1;
                  }
              }
@@ -1445,7 +1445,7 @@
                  if(s->flags2 & CODEC_FLAG2_FAST){
                      for(i=0;i<6;i++) {
                          if(cbp & 32) {
-                            mpeg2_fast_decode_block_non_intra(s, s- 
 >pblocks[i], i);
+                            mpeg2_decode_block_non_intra(s, s- 
 >pblocks[i], i, 1);
                          } else {
                              s->block_last_index[i] = -1;
                          }
@@ -1456,7 +1456,7 @@
                      for(i=0;i<mb_block_count;i++) {
                          if ( cbp & (1<<11) ) {
-                            if (mpeg2_decode_block_non_intra(s, s- 
 >pblocks[i], i) < 0)
+                            if (mpeg2_decode_block_non_intra(s, s- 
 >pblocks[i], i, 0) < 0)
                                  return -1;
                          } else {
                              s->block_last_index[i] = -1;
@@ -1468,7 +1468,7 @@
                  if(s->flags2 & CODEC_FLAG2_FAST){
                      for(i=0;i<6;i++) {
                          if (cbp & 32) {
-                            mpeg1_fast_decode_block_inter(s, s- 
 >pblocks[i], i);
+                            mpeg1_decode_block_inter(s, s->pblocks 
[i], i, 1);
                          } else {
                              s->block_last_index[i] = -1;
                          }
@@ -1477,7 +1477,7 @@
                  }else{
                      for(i=0;i<6;i++) {
                          if (cbp & 32) {
-                            if (mpeg1_decode_block_inter(s, s- 
 >pblocks[i], i) < 0)
+                            if (mpeg1_decode_block_inter(s, s- 
 >pblocks[i], i, 0) < 0)
                                  return -1;
                          } else {
                              s->block_last_index[i] = -1;
@@ -1622,15 +1622,15 @@
     return 0;
}
-static inline int mpeg1_decode_block_inter(MpegEncContext *s,
-                               DCTELEM *block,
-                               int n)
+static inline int mpeg1_decode_block_inter(MpegEncContext *s,  
DCTELEM *block, int n, int fast)
{
      int level, i, j, run;
      RLTable *rl = &rl_mpeg1;
      uint8_t * const scantable= s->intra_scantable.permutated;
-    const uint16_t *quant_matrix= s->inter_matrix;
      const int qscale= s->qscale;
+	const uint16_t *quant_matrix;
+	
+	if (!fast) quant_matrix= s->inter_matrix;
      {
          OPEN_READER(re, &s->gb);
@@ -1638,7 +1638,10 @@
          /* special case for the first coef. no need to add a second  
vlc table */
          UPDATE_CACHE(re, &s->gb);
          if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
-            level= (3*qscale*quant_matrix[0])>>5;
+            if (fast)
+               level= (3*qscale)>>1;
+            else
+               level= (3*qscale*quant_matrix[0])>>5;
              level= (level-1)|1;
              if(GET_CACHE(re, &s->gb)&0x40000000)
                  level= -level;
@@ -1656,7 +1659,10 @@
              if(level != 0) {
                  i += run;
                  j = scantable[i];
-                level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                if (fast)
+                   level= ((level*2+1)*qscale)>>1;
+                else
+                   level= ((level*2+1)*qscale*quant_matrix[j])>>5;
                  level= (level-1)|1;
                  level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -  
SHOW_SBITS(re, &s->gb, 1);
                  SKIP_BITS(re, &s->gb, 1);
@@ -1674,17 +1680,25 @@
                  j = scantable[i];
                  if(level<0){
                      level= -level;
-                    level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                    if (fast)
+                       level= ((level*2+1)*qscale)>>1;
+                    else
+                       level= ((level*2+1)*qscale*quant_matrix[j])>>5;
                      level= (level-1)|1;
                      level= -level;
                  }else{
-                    level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                    if (fast)
+                       level= ((level*2+1)*qscale)>>1;
+                    else
+                       level= ((level*2+1)*qscale*quant_matrix[j])>>5;
                      level= (level-1)|1;
                  }
              }
-            if (i > 63){
-                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d  
%d\n", s->mb_x, s->mb_y);
-                return -1;
+            if (!fast) {
+               if (i > 63){
+                  av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at % 
d %d\n", s->mb_x, s->mb_y);
+                  return -1;
+               }
              }
              block[j] = level;
@@ -1700,177 +1714,43 @@
      return 0;
}
-static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s,  
DCTELEM *block, int n)
-{
-    int level, i, j, run;
-    RLTable *rl = &rl_mpeg1;
-    uint8_t * const scantable= s->intra_scantable.permutated;
-    const int qscale= s->qscale;
-    {
-        OPEN_READER(re, &s->gb);
-        i = -1;
-        /* special case for the first coef. no need to add a second  
vlc table */
-        UPDATE_CACHE(re, &s->gb);
-        if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
-            level= (3*qscale)>>1;
-            level= (level-1)|1;
-            if(GET_CACHE(re, &s->gb)&0x40000000)
-                level= -level;
-            block[0] = level;
-            i++;
-            SKIP_BITS(re, &s->gb, 2);
-            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
-                goto end;
-        }
-
-        /* now quantify & encode AC coefs */
-        for(;;) {
-            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],  
TEX_VLC_BITS, 2, 0);
-
-            if(level != 0) {
-                i += run;
-                j = scantable[i];
-                level= ((level*2+1)*qscale)>>1;
-                level= (level-1)|1;
-                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -  
SHOW_SBITS(re, &s->gb, 1);
-                SKIP_BITS(re, &s->gb, 1);
-            } else {
-                /* escape */
-                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS 
(re, &s->gb, 6);
-                UPDATE_CACHE(re, &s->gb);
-                level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s- 
 >gb, 8);
-                if (level == -128) {
-                    level = SHOW_UBITS(re, &s->gb, 8) - 256;  
SKIP_BITS(re, &s->gb, 8);
-                } else if (level == 0) {
-                    level = SHOW_UBITS(re, &s->gb, 8)      ;  
SKIP_BITS(re, &s->gb, 8);
-                }
-                i += run;
-                j = scantable[i];
-                if(level<0){
-                    level= -level;
-                    level= ((level*2+1)*qscale)>>1;
-                    level= (level-1)|1;
-                    level= -level;
-                }else{
-                    level= ((level*2+1)*qscale)>>1;
-                    level= (level-1)|1;
-                }
-            }
-
-            block[j] = level;
-            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
-                break;
-            UPDATE_CACHE(re, &s->gb);
-        }
-end:
-        LAST_SKIP_BITS(re, &s->gb, 2);
-        CLOSE_READER(re, &s->gb);
-    }
-    s->block_last_index[n] = i;
-    return 0;
-}
-
-
static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
-                               DCTELEM *block,
-                               int n)
+                                               DCTELEM *block,
+                                               int n,
+                                               int fast)
{
      int level, i, j, run;
      RLTable *rl = &rl_mpeg1;
      uint8_t * const scantable= s->intra_scantable.permutated;
-    const uint16_t *quant_matrix;
      const int qscale= s->qscale;
-    int mismatch;
+	const uint16_t *quant_matrix; // !fast
+	int mismatch;
-    mismatch = 1;
-
-    {
-        OPEN_READER(re, &s->gb);
-        i = -1;
-        if (n < 4)
-            quant_matrix = s->inter_matrix;
-        else
-            quant_matrix = s->chroma_inter_matrix;
-
-        /* special case for the first coef. no need to add a second  
vlc table */
-        UPDATE_CACHE(re, &s->gb);
-        if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
-            level= (3*qscale*quant_matrix[0])>>5;
-            if(GET_CACHE(re, &s->gb)&0x40000000)
-                level= -level;
-            block[0] = level;
-            mismatch ^= level;
-            i++;
-            SKIP_BITS(re, &s->gb, 2);
-            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
-                goto end;
-        }
-
-        /* now quantify & encode AC coefs */
-        for(;;) {
-            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],  
TEX_VLC_BITS, 2, 0);
-
-            if(level != 0) {
-                i += run;
-                j = scantable[i];
-                level= ((level*2+1)*qscale*quant_matrix[j])>>5;
-                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -  
SHOW_SBITS(re, &s->gb, 1);
-                SKIP_BITS(re, &s->gb, 1);
-            } else {
-                /* escape */
-                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS 
(re, &s->gb, 6);
-                UPDATE_CACHE(re, &s->gb);
-                level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s- 
 >gb, 12);
-
-                i += run;
-                j = scantable[i];
-                if(level<0){
-                    level= ((-level*2+1)*qscale*quant_matrix[j])>>5;
-                    level= -level;
-                }else{
-                    level= ((level*2+1)*qscale*quant_matrix[j])>>5;
-                }
-            }
-            if (i > 63){
-                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d  
%d\n", s->mb_x, s->mb_y);
-                return -1;
-            }
-
-            mismatch ^= level;
-            block[j] = level;
-            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
-                break;
-            UPDATE_CACHE(re, &s->gb);
-        }
-end:
-        LAST_SKIP_BITS(re, &s->gb, 2);
-        CLOSE_READER(re, &s->gb);
-    }
-    block[63] ^= (mismatch & 1);
-
-    s->block_last_index[n] = i;
-    return 0;
-}
-
-static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
-                               DCTELEM *block,
-                               int n)
-{
-    int level, i, j, run;
-    RLTable *rl = &rl_mpeg1;
-    uint8_t * const scantable= s->intra_scantable.permutated;
-    const int qscale= s->qscale;
      OPEN_READER(re, &s->gb);
      i = -1;
+	if (!fast)
+	{
+       mismatch = 1;
+
+       if (n < 4)
+          quant_matrix = s->inter_matrix;
+       else
+          quant_matrix = s->chroma_inter_matrix;
+	}
+
      /* special case for the first coef. no need to add a second vlc  
table */
      UPDATE_CACHE(re, &s->gb);
      if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
-        level= (3*qscale)>>1;
+        if (fast)
+           level= (3*qscale)>>1;
+        else
+           level= (3*qscale*quant_matrix[0])>>5;
          if(GET_CACHE(re, &s->gb)&0x40000000)
              level= -level;
          block[0] = level;
+		if (!fast) mismatch ^= level;
          i++;
          SKIP_BITS(re, &s->gb, 2);
          if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
@@ -1884,7 +1764,10 @@
          if(level != 0) {
              i += run;
              j = scantable[i];
-            level= ((level*2+1)*qscale)>>1;
+            if (fast)
+               level= ((level*2+1)*qscale)>>1;
+            else
+               level= ((level*2+1)*qscale*quant_matrix[j])>>5;
              level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS 
(re, &s->gb, 1);
              SKIP_BITS(re, &s->gb, 1);
          } else {
@@ -1896,13 +1779,26 @@
              i += run;
              j = scantable[i];
              if(level<0){
-                level= ((-level*2+1)*qscale)>>1;
+                if (fast)
+                   level= ((-level*2+1)*qscale)>>1;
+                else
+                   level= ((-level*2+1)*qscale*quant_matrix[j])>>5;
                  level= -level;
              }else{
-                level= ((level*2+1)*qscale)>>1;
+                if (fast)
+                   level= ((level*2+1)*qscale)>>1;
+                else
+                   level= ((level*2+1)*qscale*quant_matrix[j])>>5;
              }
          }
+        if (!fast) {
+           if (i > 63){
+              av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d 
\n", s->mb_x, s->mb_y);
+              return -1;
+           }
+           mismatch ^= level;
+        }
          block[j] = level;
          if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
              break;
@@ -1911,19 +1807,21 @@
end:
      LAST_SKIP_BITS(re, &s->gb, 2);
      CLOSE_READER(re, &s->gb);
+    if (!fast) block[63] ^= (mismatch & 1);
      s->block_last_index[n] = i;
      return 0;
}
static inline int mpeg2_decode_block_intra(MpegEncContext *s,
-                               DCTELEM *block,
-                               int n)
+                                           DCTELEM *block,
+                                           int n,
+                                           int fast)
{
      int level, dc, diff, i, j, run;
      int component;
      RLTable *rl;
-    uint8_t * const scantable= s->intra_scantable.permutated;
+    uint8_t * scantable= s->intra_scantable.permutated; // could be  
const if (!fast)
      const uint16_t *quant_matrix;
      const int qscale= s->qscale;
      int mismatch;
@@ -1943,9 +1841,11 @@
      dc += diff;
      s->last_dc[component] = dc;
      block[0] = dc << (3 - s->intra_dc_precision);
-    dprintf(s->avctx, "dc=%d\n", block[0]);
-    mismatch = block[0] ^ 1;
-    i = 0;
+    if (!fast) {
+       dprintf("dc=%d\n", block[0]);
+       mismatch = block[0] ^ 1;
+       i = 0;
+    }
      if (s->intra_vlc_format)
          rl = &rl_mpeg2;
      else
@@ -1961,8 +1861,13 @@
              if(level == 127){
                  break;
              } else if(level != 0) {
-                i += run;
-                j = scantable[i];
+                if (fast) {
+                   scantable += run;
+                   j = *scantable;
+                } else {
+                   i += run;
+                   j = scantable[i];
+                }
                  level= (level*qscale*quant_matrix[j])>>4;
                  level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -  
SHOW_SBITS(re, &s->gb, 1);
                  LAST_SKIP_BITS(re, &s->gb, 1);
@@ -1971,8 +1876,13 @@
                  run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS 
(re, &s->gb, 6);
                  UPDATE_CACHE(re, &s->gb);
                  level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re,  
&s->gb, 12);
-                i += run;
-                j = scantable[i];
+                if (fast) {
+                   scantable += run;
+                   j = *scantable;
+                } else {
+                   i += run;
+                   j = scantable[i];
+                }
                  if(level<0){
                      level= (-level*qscale*quant_matrix[j])>>4;
                      level= -level;
@@ -1980,92 +1890,23 @@
                      level= (level*qscale*quant_matrix[j])>>4;
                  }
              }
-            if (i > 63){
-                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d  
%d\n", s->mb_x, s->mb_y);
-                return -1;
+            if (!fast) {
+               if (i > 63){
+                  av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at % 
d %d\n", s->mb_x, s->mb_y);
+                  return -1;
+               }
+               mismatch^= level;
              }
-
-            mismatch^= level;
              block[j] = level;
          }
          CLOSE_READER(re, &s->gb);
      }
-    block[63]^= mismatch&1;
+    if (!fast) block[63]^= mismatch&1;
-    s->block_last_index[n] = i;
+    s->block_last_index[n] = (fast ? scantable - s- 
 >intra_scantable.permutated : i);
      return 0;
}
-static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
-                               DCTELEM *block,
-                               int n)
-{
-    int level, dc, diff, j, run;
-    int component;
-    RLTable *rl;
-    uint8_t * scantable= s->intra_scantable.permutated;
-    const uint16_t *quant_matrix;
-    const int qscale= s->qscale;
-
-    /* DC coef */
-    if (n < 4){
-        quant_matrix = s->intra_matrix;
-        component = 0;
-    }else{
-        quant_matrix = s->chroma_intra_matrix;
-        component = (n&1) + 1;
-    }
-    diff = decode_dc(&s->gb, component);
-    if (diff >= 0xffff)
-        return -1;
-    dc = s->last_dc[component];
-    dc += diff;
-    s->last_dc[component] = dc;
-    block[0] = dc << (3 - s->intra_dc_precision);
-    if (s->intra_vlc_format)
-        rl = &rl_mpeg2;
-    else
-        rl = &rl_mpeg1;
-
-    {
-        OPEN_READER(re, &s->gb);
-        /* now quantify & encode AC coefs */
-        for(;;) {
-            UPDATE_CACHE(re, &s->gb);
-            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],  
TEX_VLC_BITS, 2, 0);
-
-            if(level == 127){
-                break;
-            } else if(level != 0) {
-                scantable += run;
-                j = *scantable;
-                level= (level*qscale*quant_matrix[j])>>4;
-                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -  
SHOW_SBITS(re, &s->gb, 1);
-                LAST_SKIP_BITS(re, &s->gb, 1);
-            } else {
-                /* escape */
-                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS 
(re, &s->gb, 6);
-                UPDATE_CACHE(re, &s->gb);
-                level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s- 
 >gb, 12);
-                scantable += run;
-                j = *scantable;
-                if(level<0){
-                    level= (-level*qscale*quant_matrix[j])>>4;
-                    level= -level;
-                }else{
-                    level= (level*qscale*quant_matrix[j])>>4;
-                }
-            }
-
-            block[j] = level;
-        }
-        CLOSE_READER(re, &s->gb);
-    }
-
-    s->block_last_index[n] = scantable - s->intra_scantable.permutated;
-    return 0;
-}
-
typedef struct Mpeg1Context {
      MpegEncContext mpeg_enc_ctx;
      int mpeg_enc_ctx_allocated; /* true if decoding context  
allocated */






More information about the ffmpeg-devel mailing list