[FFmpeg-cvslog] x86/synth_filter: Revert the switch to float ops with SSE2

James Almer git at videolan.org
Sun Mar 2 12:28:05 CET 2014


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Mar  1 23:46:27 2014 -0300| [884e085d1ea34f2f773b9589ae8e8aa9ca91b358] | committer: Michael Niedermayer

x86/synth_filter: Revert the switch to float ops with SSE2

This reverts the changes 64672098361361cd15d37e36f747ab44de5b80ca
and 68c3ed936a76c3ff7738f602fa90237ac7e3ce08 did to the SSE2 version,
which generated a hit of about 5 cycles.

Signed-off-by: James Almer <jamrial at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=884e085d1ea34f2f773b9589ae8e8aa9ca91b358
---

 libavcodec/x86/dcadsp.asm |   21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 972ce1e..a6a4582 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -199,6 +199,14 @@ INIT_XMM sse
 DCA_LFE_FIR 0
 DCA_LFE_FIR 1
 
+%macro SETZERO 1
+%if cpuflag(sse2) && notcpuflag(avx)
+    pxor          %1, %1
+%else
+    xorps         %1, %1, %1
+%endif
+%endmacro
+
 %macro SHUF 3
 %if cpuflag(avx)
     mova          %3, [%2 - 16]
@@ -265,7 +273,12 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
                               synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
+%if cpuflag(sse2) && notcpuflag(avx)
+    movd          m0, scalem
+    SPLATD        m0
+%else
     VBROADCASTSS  m0, scalem
+%endif
 ; Make sure offset is in a register and not on the stack
 %define OFFQ  r4q
 %else
@@ -290,8 +303,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
 %endif
 .mainloop
     ; m1 = a  m2 = b  m3 = c  m4 = d
-    xorps         m3, m3, m3
-    xorps         m4, m4, m4
+    SETZERO       m3
+    SETZERO       m4
     mova          m1, [buf2 + i]
     mova          m2, [buf2 + i + 16 * 4]
 %if ARCH_X86_32
@@ -308,8 +321,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
 %define ptr2     r7q ; must be loaded
 %define win      r8q
 %define j        r9q
-    xorps         m9, m9, m9
-    xorps        m10, m10, m10
+    SETZERO       m9
+    SETZERO      m10
     mova          m7, [buf2 + i + mmsize]
     mova          m8, [buf2 + i + mmsize + 16 * 4]
     lea          win, [windowq + i]



More information about the ffmpeg-cvslog mailing list