[FFmpeg-devel] [PATCH 2/3] ARM optimised vp56_rac_get_prob()

Wed Feb 2 17:36:51 CET 2011

Approximately 3% faster on Cortex-A8.

Signed-off-by: Mans Rullgard <mans at mansr.com>
---
 libavcodec/arm/vp56_arith.h |   92 +++++++++++++++++++++++++++++++++++++++++++
 libavcodec/vp56.h           |    6 ++-
 2 files changed, 97 insertions(+), 1 deletions(-)
 create mode 100644 libavcodec/arm/vp56_arith.h

diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
new file mode 100644
index 0000000..9bcb466
--- /dev/null
+++ b/libavcodec/arm/vp56_arith.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2010 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_VP56_ARITH_H
+#define AVCODEC_ARM_VP56_ARITH_H
+
+#if HAVE_ARMV6 && HAVE_INLINE_ASM
+
+#define vp56_rac_get_prob vp56_rac_get_prob_armv6
+static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
+{
+    unsigned shift     = ff_vp56_norm_shift[c->high];
+    unsigned code_word = c->code_word << shift;
+    unsigned high      = c->high << shift;
+    unsigned bit;
+
+    __asm__ volatile ("adds    %3,  %3,  %0           \n"
+                      "cmpcs   %7,  %4                \n"
+                      "ldrcsh  %2,  [%4], #2          \n"
+                      "rsb     %0,  %6,  #256         \n"
+                      "smlabb  %0,  %5,  %6,  %0      \n"
+                      "rev16cs %2,  %2                \n"
+                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
+                      "subcs   %3,  %3,  #16          \n"
+                      "lsr     %0,  %0,  #8           \n"
+                      "cmp     %1,  %0,  lsl #16      \n"
+                      "subge   %1,  %1,  %0,  lsl #16 \n"
+                      "subge   %0,  %5,  %0           \n"
+                      "movge   %2,  #1                \n"
+                      "movlt   %2,  #0                \n"
+                      : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
+                        "+&r"(c->bits), "+&r"(c->buffer)
+                      : "r"(high), "r"(pr), "r"(c->end - 1),
+                        "0"(shift), "1"(code_word));
+
+    return bit;
+}
+
+#define vp56_rac_get_prob_branchy vp56_rac_get_prob_branchy_armv6
+static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
+{
+    unsigned shift     = ff_vp56_norm_shift[c->high];
+    unsigned code_word = c->code_word << shift;
+    unsigned high      = c->high << shift;
+    unsigned low;
+    unsigned tmp;
+
+    __asm__ volatile ("adds    %3,  %3,  %0           \n"
+                      "cmpcs   %7,  %4                \n"
+                      "ldrcsh  %2,  [%4], #2          \n"
+                      "rsb     %0,  %6,  #256         \n"
+                      "smlabb  %0,  %5,  %6,  %0      \n"
+                      "rev16cs %2,  %2                \n"
+                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
+                      "subcs   %3,  %3,  #16          \n"
+                      "lsr     %0,  %0,  #8           \n"
+                      "lsl     %2,  %0,  #16          \n"
+                      : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
+                        "+&r"(c->bits), "+&r"(c->buffer)
+                      : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift));
+
+    if (code_word >= tmp) {
+        c->high      = high - low;
+        c->code_word = code_word - tmp;
+        return 1;
+    }
+
+    c->high      = low;
+    c->code_word = code_word;
+    return 0;
+}
+
+#endif
+
+#endif
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index da6b1b6..d1735e5 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -201,7 +201,9 @@ static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
     return code_word;
 }
 
-#if ARCH_X86
+#if   ARCH_ARM
+#include "arm/vp56_arith.h"
+#elif ARCH_X86
 #include "x86/vp56_arith.h"
 #endif
 
@@ -221,6 +223,7 @@ static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
 }
 #endif
 
+#ifndef vp56_rac_get_prob_branchy
 // branchy variant, to be used where there's a branch based on the bit decoded
 static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
 {
@@ -238,6 +241,7 @@ static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int pro
     c->code_word = code_word;
     return 0;
 }
+#endif
 
 static av_always_inline int vp56_rac_get(VP56RangeCoder *c)
 {
-- 
1.7.4