[FFmpeg-devel] [PATCH 1/5] libavcodec: aarch64: Don't clobber v8 in the h%4 case in ff_pix_abs16_xy2_neon

Martin Storsjö martin at martin.st
Wed Jul 13 23:48:50 EEST 2022


Checkasm doesn't currently test this codepath.
---
 libavcodec/aarch64/me_cmp_neon.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aarch64/me_cmp_neon.S b/libavcodec/aarch64/me_cmp_neon.S
index e49d049fc2..31db3793d9 100644
--- a/libavcodec/aarch64/me_cmp_neon.S
+++ b/libavcodec/aarch64/me_cmp_neon.S
@@ -189,11 +189,11 @@ function ff_pix_abs16_xy2_neon, export=1
         urshr           v16.8h, v16.8h, #2          // shift right by 2 0..7 (rounding shift right)
         urshr           v17.8h, v17.8h, #2          // shift right by 2 8..15
 
-        uxtl2           v8.8h, v1.16b               // 8->16 bits pix1 8..15
+        uxtl2           v7.8h, v1.16b               // 8->16 bits pix1 8..15
         uxtl            v1.8h, v1.8b                // 8->16 bits pix1 0..7
 
         uabd            v6.8h, v1.8h, v16.8h        // absolute difference 0..7
-        uaba            v6.8h, v8.8h, v17.8h        // absolute difference accumulate 8..15
+        uaba            v6.8h, v7.8h, v17.8h        // absolute difference accumulate 8..15
         mov             v2.16b, v18.16b             // pix3 -> pix2
         mov             v3.16b, v19.16b             // pix3+1 -> pix2+1
         uaddlv          s6, v6.8h                   // add up accumulator in v6
-- 
2.25.1



More information about the ffmpeg-devel mailing list