[FFmpeg-cvslog] x86: float dsp: butterflies_float SSE

Christophe Gisquet git at videolan.org
Fri May 3 12:03:08 CEST 2013


ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Fri Apr 12 21:07:01 2013 +0200| [566b7a20fd0cab44d344329538d314454a0bcc2f] | committer: Anton Khirnov

x86: float dsp: butterflies_float SSE

97c -> 49c
Some codecs could benefit from more unrolling, but AAC doesn't.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=566b7a20fd0cab44d344329538d314454a0bcc2f
---

 libavutil/x86/float_dsp.asm    |   26 ++++++++++++++++++++++++++
 libavutil/x86/float_dsp_init.c |    3 +++
 2 files changed, 29 insertions(+)

diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 779339c..10330ff 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -252,3 +252,29 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
     fld dword r0m
 %endif
     RET
+
+;-----------------------------------------------------------------------------
+; void ff_butterflies_float(float *src0, float *src1, int len);
+;-----------------------------------------------------------------------------
+INIT_XMM sse
+cglobal butterflies_float, 3,3,3, src0, src1, len
+%if ARCH_X86_64
+    movsxd    lenq, lend
+%endif
+    test      lenq, lenq
+    jz .end
+    shl       lenq, 2
+    lea      src0q, [src0q +   lenq]
+    lea      src1q, [src1q +   lenq]
+    neg       lenq
+.loop:
+    mova        m0, [src0q + lenq]
+    mova        m1, [src1q + lenq]
+    subps       m2, m0, m1
+    addps       m0, m0, m1
+    mova        [src1q + lenq], m2
+    mova        [src0q + lenq], m0
+    add       lenq, mmsize
+    jl .loop
+.end:
+    REP_RET
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index b5e9af9..3486301 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -53,6 +53,8 @@ void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
 
 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
 
+void ff_butterflies_float_sse(float *src0, float *src1, int len);
+
 #if HAVE_6REGS && HAVE_INLINE_ASM
 static void vector_fmul_window_3dnowext(float *dst, const float *src0,
                                         const float *src1, const float *win,
@@ -138,6 +140,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
         fdsp->vector_fmul_add    = ff_vector_fmul_add_sse;
         fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
         fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
+        fdsp->butterflies_float   = ff_butterflies_float_sse;
     }
     if (EXTERNAL_SSE2(mm_flags)) {
         fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;



More information about the ffmpeg-cvslog mailing list