[FFmpeg-devel] [PATCH 1/5] x264asm: extend SBUTTERFLY to support SSE1
Christophe Gisquet
christophe.gisquet at gmail.com
Sun Apr 7 22:20:30 CEST 2013
This was discussed as an alternative to manipulating instructions directly.
This version also fixes the case where %2 == %3.
---
libavutil/x86/x86util.asm | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 8908444..07ca768 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -30,10 +30,18 @@
%include "libavutil/x86/x86inc.asm"
%macro SBUTTERFLY 4
-%if avx_enabled == 0
+%if notcpuflag(sse2) && mmsize == 16
+ %ifidn %1, dq
+ mova m%4, m%2
+ unpckhps m%4, m%3
+ unpcklps m%2, m%3
+ %else
+ %error Only dq unpack is supported by SBUTTERFLY on SSE1
+ %endif
+%elif avx_enabled == 0
mova m%4, m%2
- punpckl%1 m%2, m%3
punpckh%1 m%4, m%3
+ punpckl%1 m%2, m%3
%else
punpckh%1 m%4, m%2, m%3
punpckl%1 m%2, m%3
--
1.8.0.msysgit.0
More information about the ffmpeg-devel
mailing list