[FFmpeg-devel] [PATCH] Further optimization of base64 decode using AV_WB32.

Reimar Döffinger Reimar.Doeffinger at gmx.de
Sat Jan 21 17:52:27 CET 2012


This is somewhat questionable.
The biggest issue is that av_bswap32 is not replaced
with our asm version on gcc 4.5 or newer.
This causes gcc to generate horrible code that is slower
than the unoptimized variant.
Old:                                  248852 decicycles
New with gcc's attempt at av_bswap32: 256576 decicycles
New with our bswap32:                 200260 decicycles

Signed-off-by: Reimar Döffinger <Reimar.Doeffinger at gmx.de>
---
 libavutil/base64.c    |   57 ++++++++++++++++++++++++++++--------------------
 libavutil/x86/bswap.h |    2 +-
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/libavutil/base64.c b/libavutil/base64.c
index df0f6a3..bbf2732 100644
--- a/libavutil/base64.c
+++ b/libavutil/base64.c
@@ -68,52 +68,61 @@ static const uint8_t map2[256] =
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 };
 
-#define BASE64_DEC_STEP(i) \
+#define BASE64_DEC_STEP(i) do {\
     bits = map2[in[i]]; \
     if (bits & 0x80) \
-        goto out; \
-    v = (v << 6) + bits; \
-    if (i & 3) \
-        *dst++ = v >> (6 - 2 * (i & 3)); \
+        goto out ## i; \
+    v = i ? (v << 6) + bits : bits; \
+} while(0)
 
 int av_base64_decode(uint8_t *out, const char *in_str, int out_size)
 {
-    int v;
     uint8_t *dst = out;
     uint8_t *end = out + out_size;
     // no sign extension
     const uint8_t *in = in_str;
     unsigned bits = 0xff;
+    unsigned v;
 
-    v = 0;
-    while (end - dst > 2) {
-        BASE64_DEC_STEP(0)
-        BASE64_DEC_STEP(1)
-        BASE64_DEC_STEP(2)
-        BASE64_DEC_STEP(3)
+    while (end - dst > 3) {
+        BASE64_DEC_STEP(0);
+        BASE64_DEC_STEP(1);
+        BASE64_DEC_STEP(2);
+        BASE64_DEC_STEP(3);
+        AV_WB32(dst, v << 8);
+        dst += 3;
         in += 4;
     }
     if (end - dst) {
-        BASE64_DEC_STEP(0)
-        BASE64_DEC_STEP(1)
-        if (end - dst) {
-            BASE64_DEC_STEP(2)
-            in++;
-        }
-        in += 2;
+        BASE64_DEC_STEP(0);
+        BASE64_DEC_STEP(1);
+        BASE64_DEC_STEP(2);
+        BASE64_DEC_STEP(3);
+        *dst++ = v >> 16;
+        if (end - dst)
+            *dst++ = v >> 8;
+        if (end - dst)
+            *dst++ = v;
+        in += 4;
     }
     while (1) {
-        BASE64_DEC_STEP(0)
+        BASE64_DEC_STEP(0);
         in++;
-        BASE64_DEC_STEP(0)
+        BASE64_DEC_STEP(0);
         in++;
-        BASE64_DEC_STEP(0)
+        BASE64_DEC_STEP(0);
         in++;
-        BASE64_DEC_STEP(0)
+        BASE64_DEC_STEP(0);
         in++;
     }
 
-out:
+out3:
+    *dst++ = v >> 10;
+    v <<= 2;
+out2:
+    *dst++ = v >> 4;
+out1:
+out0:
     return bits & 1 ? -1 : dst - out;
 }
 
diff --git a/libavutil/x86/bswap.h b/libavutil/x86/bswap.h
index 52ffb4d..aa39d97 100644
--- a/libavutil/x86/bswap.h
+++ b/libavutil/x86/bswap.h
@@ -37,7 +37,7 @@ static av_always_inline av_const unsigned av_bswap16(unsigned x)
 }
 #endif /* !AV_GCC_VERSION_AT_LEAST(4,1) */
 
-#if !AV_GCC_VERSION_AT_LEAST(4,5)
+#if 1 || !AV_GCC_VERSION_AT_LEAST(4,5)
 #define av_bswap32 av_bswap32
 static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
 {
-- 
1.7.8.3



More information about the ffmpeg-devel mailing list