[FFmpeg-cvslog] libswscale/x86/yuv2yuvX: Removes unrolling for mmx and mmxext
Alan Kelly
git at videolan.org
Sat Apr 3 20:52:25 EEST 2021
ffmpeg | branch: release/4.4 | Alan Kelly <alankelly at google.com> | Thu Apr 1 12:00:15 2021 +0200| [4aeedf4c2a8f35be667d5dd40c84bd27730ef1db] | committer: Michael Niedermayer
libswscale/x86/yuv2yuvX: Removes unrolling for mmx and mmxext
Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
(cherry picked from commit 3ce8d092448827842c451807f03010ad5129fd8f)
Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4aeedf4c2a8f35be667d5dd40c84bd27730ef1db
---
libswscale/x86/yuv2yuvX.asm | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 521880dabe..b6294cb919 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -37,8 +37,10 @@ SECTION .text
cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
%if notcpuflag(sse3)
%define movr mova
+%define unroll 1
%else
%define movr movdqu
+%define unroll 2
%endif
movsxdifnidn dstWq, dstWd
movsxdifnidn offsetq, offsetd
@@ -70,8 +72,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
.outerloop:
mova m4, m7
mova m3, m7
+%if cpuflag(sse3)
mova m6, m7
mova m1, m7
+%endif
.loop:
%if cpuflag(avx2)
vpbroadcastq m0, [filterSizeq + 8]
@@ -84,28 +88,36 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
pmulhw m5, m0, [srcq + offsetq * 2 + mmsize]
paddw m3, m3, m2
paddw m4, m4, m5
+%if cpuflag(sse3)
pmulhw m2, m0, [srcq + offsetq * 2 + 2 * mmsize]
pmulhw m5, m0, [srcq + offsetq * 2 + 3 * mmsize]
paddw m6, m6, m2
paddw m1, m1, m5
+%endif
add filterSizeq, $10
mov srcq, [filterSizeq]
test srcq, srcq
jnz .loop
psraw m3, m3, 3
psraw m4, m4, 3
+%if cpuflag(sse3)
psraw m6, m6, 3
psraw m1, m1, 3
+%endif
packuswb m3, m3, m4
+%if cpuflag(sse3)
packuswb m6, m6, m1
+%endif
mov srcq, [filterq]
%if cpuflag(avx2)
vpermq m3, m3, 216
vpermq m6, m6, 216
%endif
movr [destq + offsetq], m3
+%if cpuflag(sse3)
movr [destq + offsetq + mmsize], m6
- add offsetq, mmsize * 2
+%endif
+ add offsetq, mmsize * unroll
mov filterSizeq, filterq
cmp offsetq, dstWq
jb .outerloop
More information about the ffmpeg-cvslog
mailing list