[FFmpeg-devel] [PATCH 1/5] x264asm: extend SBUTTERFLY to support SSE1

Christophe Gisquet christophe.gisquet at gmail.com
Sun Apr 7 22:20:30 CEST 2013


This was discussed as an alternative to manipulating instructions directly.
This version also fixes the case where %2 == %3.
---
 libavutil/x86/x86util.asm | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 8908444..07ca768 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -30,10 +30,18 @@
 %include "libavutil/x86/x86inc.asm"
 
 %macro SBUTTERFLY 4
-%if avx_enabled == 0
+%if notcpuflag(sse2) && mmsize == 16
+  %ifidn %1, dq
+    mova      m%4, m%2
+    unpckhps  m%4, m%3
+    unpcklps  m%2, m%3
+  %else
+    %error Only dq unpack is supported by SBUTTERFLY on SSE1
+  %endif
+%elif avx_enabled == 0
     mova      m%4, m%2
-    punpckl%1 m%2, m%3
     punpckh%1 m%4, m%3
+    punpckl%1 m%2, m%3
 %else
     punpckh%1 m%4, m%2, m%3
     punpckl%1 m%2, m%3
-- 
1.8.0.msysgit.0



More information about the ffmpeg-devel mailing list