[FFmpeg-cvslog] x86/float_dsp: use three operand form for some instructions

James Almer git at videolan.org
Tue Sep 13 19:50:47 EEST 2022


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Sep 13 13:50:09 2022 -0300| [bda3a9faf4a2f201b24fb38a04da86410c9205ae] | committer: James Almer

x86/float_dsp: use three operand form for some instructions

Fixes compilation with old yasm

Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bda3a9faf4a2f201b24fb38a04da86410c9205ae
---

 libavutil/x86/float_dsp.asm | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 8f8e6dddf5..ff608f5f5a 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -443,19 +443,19 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
 INIT_YMM fma3
 cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
     xor   offsetq, offsetq
-    xorps      m0, m0
+    xorps      m0, m0, m0
     shl     sized, 2
     mov      lenq, sizeq
     cmp      lenq, 32
     jl   .l16
     cmp      lenq, 64
     jl   .l32
-    xorps    m1, m1
+    xorps    m1, m1, m1
     cmp      lenq, 128
     jl   .l64
     and    lenq, ~127
-    xorps    m2, m2
-    xorps    m3, m3
+    xorps    m2, m2, m2
+    xorps    m3, m3, m3
 .loop128:
     movups   m4, [v1q+offsetq]
     movups   m5, [v1q+offsetq + 32]
@@ -468,13 +468,13 @@ cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
     add   offsetq, 128
     cmp   offsetq, lenq
     jl .loop128
-    addps    m0, m2
-    addps    m1, m3
+    addps    m0, m0, m2
+    addps    m1, m1, m3
     mov      lenq, sizeq
     and      lenq, 127
     cmp      lenq, 64
     jge .l64
-    addps    m0, m1
+    addps    m0, m0, m1
     cmp      lenq, 32
     jge .l32
     vextractf128 xmm2, m0, 1
@@ -502,7 +502,7 @@ cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
     add   offsetq, 64
     cmp   offsetq, lenq
     jl .loop64
-    addps    m0, m1
+    addps    m0, m0, m1
     mov      lenq, sizeq
     and      lenq, 63
     cmp      lenq, 32



More information about the ffmpeg-cvslog mailing list