[FFmpeg-cvslog] aarch64: vp8: Skip saturating in shrn in ff_vp8_idct_add_neon
Martin Storsjö
git at videolan.org
Thu Mar 14 21:23:59 EET 2019
ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Fri Feb 1 00:00:58 2019 +0200| [49f9c4272c4029b57ff300d908ba03c6332fc9c4] | committer: Martin Storsjö
aarch64: vp8: Skip saturating in shrn in ff_vp8_idct_add_neon
The original arm version didn't do saturation here. This probably
doesn't make any difference for performance, but reduces the
differences.
Signed-off-by: Martin Storsjö <martin at martin.st>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=49f9c4272c4029b57ff300d908ba03c6332fc9c4
---
libavcodec/aarch64/vp8dsp_neon.S | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/libavcodec/aarch64/vp8dsp_neon.S b/libavcodec/aarch64/vp8dsp_neon.S
index 139b380fa4..cac4558ab2 100644
--- a/libavcodec/aarch64/vp8dsp_neon.S
+++ b/libavcodec/aarch64/vp8dsp_neon.S
@@ -92,8 +92,8 @@ function ff_vp8_idct_add_neon, export=1
smull v27.4s, v3.4h, v4.h[0]
sqdmulh v20.4h, v1.4h, v4.h[1]
sqdmulh v23.4h, v3.4h, v4.h[1]
- sqshrn v21.4h, v26.4s, #16
- sqshrn v22.4h, v27.4s, #16
+ shrn v21.4h, v26.4s, #16
+ shrn v22.4h, v27.4s, #16
add v21.4h, v21.4h, v1.4h
add v22.4h, v22.4h, v3.4h
@@ -117,8 +117,8 @@ function ff_vp8_idct_add_neon, export=1
st1 {v29.16b}, [x1]
sqdmulh v21.4h, v1.4h, v4.h[1]
sqdmulh v23.4h, v3.4h, v4.h[1]
- sqshrn v20.4h, v26.4s, #16
- sqshrn v22.4h, v27.4s, #16
+ shrn v20.4h, v26.4s, #16
+ shrn v22.4h, v27.4s, #16
add v20.4h, v20.4h, v1.4h
add v22.4h, v22.4h, v3.4h
add v16.4h, v0.4h, v2.4h
More information about the ffmpeg-cvslog
mailing list