[FFmpeg-devel] [PATCH] ARM: NEON optimised vorbis_inverse_coupling

Mans Rullgard mans
Mon Aug 25 05:06:41 CEST 2008


---
 libavcodec/armv4l/dsputil_neon.c   |    6 ++++++
 libavcodec/armv4l/dsputil_neon_s.S |   23 +++++++++++++++++++++++
 2 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c
index b584e5b..f9d32c0 100644
--- a/libavcodec/armv4l/dsputil_neon.c
+++ b/libavcodec/armv4l/dsputil_neon.c
@@ -94,6 +94,8 @@ void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
 void ff_float_to_int16_neon(int16_t *, const float *, long);
 void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
 
+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
+
 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 {
     c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
@@ -164,4 +166,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 
     c->float_to_int16 = ff_float_to_int16_neon;
     c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
+
+#ifdef CONFIG_VORBIS_DECODER
+    c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
+#endif
 }
diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S
index 8e1ee6d..5ccb034 100644
--- a/libavcodec/armv4l/dsputil_neon_s.S
+++ b/libavcodec/armv4l/dsputil_neon_s.S
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+
         .fpu neon
         .text
 
@@ -321,3 +323,24 @@ extern ff_float_to_int16_interleave_neon
         bne           3b
         pop           {r4,r5,pc}
         .endfunc
+
+#ifdef CONFIG_VORBIS_DECODER
+extern ff_vorbis_inverse_coupling_neon
+        vmov.i32      q10, #(1<<31)
+        dmb
+1:      vld1.64       {d2,d3}, [r1,:128]
+        vld1.64       {d0,d1}, [r0,:128]
+        vcle.f32      q8, q1, #0
+        vand          q9, q0, q10
+        veor          q1, q1, q9
+        vand          q2, q1, q8
+        vbic          q3, q1, q8
+        vadd.f32      q1, q0, q2
+        vsub.f32      q0, q0, q3
+        subs          r2, r2, #4
+        vst1.64       {d0,d1}, [r1,:128]!
+        vst1.64       {d2,d3}, [r0,:128]!
+        bgt           1b
+        bx            lr
+        .endfunc
+#endif
-- 
1.6.0





More information about the ffmpeg-devel mailing list