[FFmpeg-devel] [PATCH 2/2] x86/vf_v360: use a faster horizontal add in remap4_8bit_line_avx2

James Almer jamrial at gmail.com
Fri Sep 6 18:30:03 EEST 2019


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavfilter/x86/vf_v360.asm | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/libavfilter/x86/vf_v360.asm b/libavfilter/x86/vf_v360.asm
index f49702b603..a0936eb6dc 100644
--- a/libavfilter/x86/vf_v360.asm
+++ b/libavfilter/x86/vf_v360.asm
@@ -130,14 +130,11 @@ cglobal remap4_8bit_line, 7, 9, 11, dst, width, src, in_linesize, u, v, ker, x,
         pmulld          m4, m5
 
         paddd           m2, m4
-        vextracti128   xm1, m2, 1
-        paddd           m1, m2
-        phaddd          m1, m1
-        phaddd          m1, m1
-        psrld           m1, m1, 0xe
-        packuswb        m1, m1
+        HADDD           m2, m1
+        psrld           m2, m2, 0xe
+        packuswb        m2, m2
 
-        pextrb   [dstq+xq], xm1, 0
+        pextrb   [dstq+xq], xm2, 0
 
         add   xq, 1
         add   yq, 32
-- 
2.22.0



More information about the ffmpeg-devel mailing list