[FFmpeg-cvslog] x86: float dsp: butterflies_float SSE
Christophe Gisquet
git at videolan.org
Wed Apr 17 00:11:55 CEST 2013
ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Fri Apr 12 21:07:01 2013 +0200| [1a4007964c106d01f46a5a7f03c1c41fd869b35c] | committer: Michael Niedermayer
x86: float dsp: butterflies_float SSE
97c -> 49c
Some codecs could benefit from more unrolling, but AAC doesn't.
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1a4007964c106d01f46a5a7f03c1c41fd869b35c
---
libavutil/x86/float_dsp.asm | 23 +++++++++++++++++++++++
libavutil/x86/float_dsp_init.c | 3 +++
2 files changed, 26 insertions(+)
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 004e6cf..f0310ef 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
%endif
RET
+;-----------------------------------------------------------------------------
+; void ff_butterflies_float(float *src0, float *src1, int len);
+;-----------------------------------------------------------------------------
+INIT_XMM sse
+cglobal butterflies_float, 3,3,3, src0, src1, len
+ movsxdifnidn lenq, lend
+ test lenq, lenq
+ jz .end
+ shl lenq, 2
+ lea src0q, [src0q + lenq]
+ lea src1q, [src1q + lenq]
+ neg lenq
+.loop:
+ mova m0, [src0q + lenq]
+ mova m1, [src1q + lenq]
+ subps m2, m0, m1
+ addps m0, m0, m1
+ mova [src1q + lenq], m2
+ mova [src0q + lenq], m0
+ add lenq, mmsize
+ jl .loop
+.end:
+ REP_RET
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 16c6f36..ee74837 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -53,6 +53,8 @@ void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
+void ff_butterflies_float_sse(float *src0, float *src1, int len);
+
#if HAVE_6REGS && HAVE_INLINE_ASM
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win,
@@ -138,6 +140,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
+ fdsp->butterflies_float = ff_butterflies_float_sse;
}
if (EXTERNAL_SSE2(mm_flags)) {
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
More information about the ffmpeg-cvslog
mailing list