[FFmpeg-devel] [PATCH 2/3] h264: use one table instead of several for cabac functions

Roland Scheidegger rscheidegger_lists at hispeed.ch
Fri Apr 27 03:45:25 CEST 2012


The reason is this is easier for PIC code (in particular on darwin...).
Keep the old names as pointers (static in cabac_functions.h so gcc
knows these are just immediate offsets) so the c code can nicely stay the same
(alternatively could use offsets directly in the functions needing the
tables). This should produce the same code as before with non-pic and better
code (confirmed) with pic.

The assembly uses the new table but still won't work for PIC case.
---
 libavcodec/cabac.c           |   54 ++++++++++++++++++++++++------------------
 libavcodec/cabac.h           |    5 ++++
 libavcodec/cabac_functions.h |    8 ++++--
 libavcodec/h264_cabac.c      |    9 +------
 libavcodec/x86/cabac.h       |   22 ++++++++++-------
 libavcodec/x86/h264_i386.h   |   24 +++++++++++++-----
 6 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 196e548..14ef30a 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -31,6 +31,29 @@
 #include "cabac.h"
 #include "cabac_functions.h"
 
+uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
+ 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
 static const uint8_t lps_range[64][4]= {
 {128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205},
 {116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166},
@@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= {
 {  6,  8,  9, 11}, {  6,  7,  9, 10}, {  6,  7,  8,  9}, {  2,  2,  2,  2},
 };
 
-uint8_t ff_h264_mlps_state[4*64];
-uint8_t ff_h264_lps_range[4*2*64];
 static uint8_t h264_lps_state[2*64];
 static uint8_t h264_mps_state[2*64];
 
@@ -77,27 +98,11 @@ static const uint8_t lps_state[64]= {
  36,36,37,37,37,38,38,63,
 };
 
-const uint8_t ff_h264_norm_shift[512]= {
- 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+static const uint8_t last_coeff_flag_offset_8x8[63] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
 };
 
 /**
@@ -158,6 +163,9 @@ void ff_init_cabac_states(CABACContext *c){
             ff_h264_mlps_state[128-2*i-2]= 0;
         }
     }
+    for(i=0; i< 63; i++){
+      ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i];
+    }
 }
 
 #ifdef TEST
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
index 667489e..29535d0 100644
--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@@ -31,6 +31,11 @@
 
 #include "put_bits.h"
 
+#define H264_NORM_SHIFT_OFFSET 0
+#define H264_LPS_RANGE_OFFSET 512
+#define H264_MLPS_STATE_OFFSET 1024
+#define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280
+
 #define CABAC_BITS 16
 #define CABAC_MASK ((1<<CABAC_BITS)-1)
 
diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h
index 90af15c..ee70fcf 100644
--- a/libavcodec/cabac_functions.h
+++ b/libavcodec/cabac_functions.h
@@ -36,9 +36,11 @@
 #   include "x86/cabac.h"
 #endif
 
-extern const uint8_t ff_h264_norm_shift[512];
-extern uint8_t ff_h264_mlps_state[4*64];
-extern uint8_t ff_h264_lps_range[4*2*64];  ///< rangeTabLPS
+extern uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
+static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
+static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
+static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
+static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
 
 static void refill(CABACContext *c){
 #if CABAC_BITS == 16
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 65f2cb4..29dbd7a 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1561,13 +1561,6 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
     return base_ctx[cat] + ctx;
 }
 
-DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
-};
-
 static av_always_inline void
 decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
                                int cat, int n, const uint8_t *scantable,
@@ -1670,7 +1663,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
                                                  last_coeff_ctx_base-significant_coeff_ctx_base);
         }
 #else
-        DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
+        DECODE_SIGNIFICANCE( 63, sig_off[last], ff_h264_last_coeff_flag_offset_8x8[last] );
     } else {
         if (is_dc && chroma422) { // dc 422
             DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 0c4419b..e112f67 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -24,10 +24,11 @@
 #include "libavcodec/cabac.h"
 #include "libavutil/attributes.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/internal.h"
 #include "config.h"
 
 #if HAVE_FAST_CMOV
-#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, range, tmp)\
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
         "mov    "tmp"       , %%ecx     \n\t"\
         "shl    $17         , "tmp"     \n\t"\
         "cmp    "low"       , "tmp"     \n\t"\
@@ -37,7 +38,7 @@
         "xor    %%ecx       , "ret"     \n\t"\
         "sub    "tmp"       , "low"     \n\t"
 #else /* HAVE_FAST_CMOV */
-#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, range, tmp)\
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
         "mov    "tmp"       , %%ecx     \n\t"\
         "shl    $17         , "tmp"     \n\t"\
         "sub    "low"       , "tmp"     \n\t"\
@@ -51,16 +52,16 @@
         "xor    "tmp"       , "ret"     \n\t"
 #endif /* HAVE_FAST_CMOV */
 
-#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end) \
+#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off) \
         "movzbl "statep"    , "ret"                                     \n\t"\
         "mov    "range"     , "tmp"                                     \n\t"\
         "and    $0xC0       , "range"                                   \n\t"\
-        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
+        "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
         "sub    "range"     , "tmp"                                     \n\t"\
         BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, range, tmp)   \
-        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
+        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx    \n\t"\
         "shl    %%cl        , "range"                                   \n\t"\
-        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
+        "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp"  \n\t"\
         "shl    %%cl        , "low"                                     \n\t"\
         "mov    "tmpbyte"   , "statep"                                  \n\t"\
         "test   "lowword"   , "lowword"                                 \n\t"\
@@ -73,7 +74,7 @@
         "shr    $15         , %%ecx                                     \n\t"\
         "bswap  "tmp"                                                   \n\t"\
         "shr    $15         , "tmp"                                     \n\t"\
-        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
+        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
         "sub    $0xFFFF     , "tmp"                                     \n\t"\
         "neg    %%ecx                                                   \n\t"\
         "add    $7          , %%ecx                                     \n\t"\
@@ -93,11 +94,14 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
     __asm__ volatile(
         BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1",
                              "%2", "%3", "%b3",
-                             "%a6(%5)", "%a7(%5)")
+                             "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10")
         : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
         : "r"(state), "r"(c),
           "i"(offsetof(CABACContext, bytestream)),
-          "i"(offsetof(CABACContext, bytestream_end))
+          "i"(offsetof(CABACContext, bytestream_end)),
+          "i"(H264_NORM_SHIFT_OFFSET),
+          "i"(H264_LPS_RANGE_OFFSET),
+          "i"(H264_MLPS_STATE_OFFSET)
         : "%"REG_c, "memory"
     );
     return bit & 1;
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 6aa2d07..d278708 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
     int minusindex= 4-(intptr_t)index;
     int bit;
     x86_reg coeff_count;
+
     __asm__ volatile(
         "3:                                     \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3",
                              "%5", "%k0", "%b0",
-                             "%a11(%6)", "%a12(%6)")
+                             "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15")
 
         "test $1, %4                            \n\t"
         " jz 4f                                 \n\t"
@@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3",
                              "%5", "%k0", "%b0",
-                             "%a11(%6)", "%a12(%6)")
+                             "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15")
 
         "sub  %10, %1                           \n\t"
         "mov  %2, %0                            \n\t"
@@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
           "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
         : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
           "i"(offsetof(CABACContext, bytestream)),
-          "i"(offsetof(CABACContext, bytestream_end))
+          "i"(offsetof(CABACContext, bytestream_end)),
+          "i"(H264_NORM_SHIFT_OFFSET),
+          "i"(H264_LPS_RANGE_OFFSET),
+          "i"(H264_MLPS_STATE_OFFSET)
         : "%"REG_c, "memory"
     );
     return coeff_count;
@@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
     x86_reg coeff_count;
     x86_reg last=0;
     x86_reg state;
+
     __asm__ volatile(
         "mov %1, %6                             \n\t"
         "3:                                     \n\t"
@@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c,
 
         BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3",
                              "%5", "%k0", "%b0",
-                             "%a12(%7)", "%a13(%7)")
+                             "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16")
 
         "mov %1, %k6                            \n\t"
         "test $1, %4                            \n\t"
         " jz 4f                                 \n\t"
 
-        "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
+        "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t"
+
         "add %11, %6                            \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3",
                              "%5", "%k0", "%b0",
-                             "%a12(%7)", "%a13(%7)")
+                             "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16")
 
         "mov %2, %0                             \n\t"
         "mov %1, %k6                            \n\t"
@@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c,
         : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
           "m"(sig_off), "m"(last_coeff_ctx_base),
           "i"(offsetof(CABACContext, bytestream)),
-          "i"(offsetof(CABACContext, bytestream_end))
+          "i"(offsetof(CABACContext, bytestream_end)),
+          "i"(H264_NORM_SHIFT_OFFSET),
+          "i"(H264_LPS_RANGE_OFFSET),
+          "i"(H264_MLPS_STATE_OFFSET),
+          "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET)
         : "%"REG_c, "memory"
     );
     return coeff_count;
-- 
1.7.1



More information about the ffmpeg-devel mailing list