[FFmpeg-cvslog] arm: hevc_qpel: Fix the assembly to work with non-multiple of 8 widths

Wed Aug 25 23:24:58 EEST 2021

ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Wed Aug 25 11:31:33 2021 +0300| [2589060b92eeeb944c6e2b50e38412c0c5fabcf4] | committer: Martin Storsjö

arm: hevc_qpel: Fix the assembly to work with non-multiple of 8 widths

This unbreaks the fate-checkasm-hevc_pel test on arm targets.

The assembly assumed that the width passed to the DSP functions is
a multiple of 8, while the checkasm test used other widths too.

This wasn't noticed before, because the hevc_pel checkasm tests
(that were added in 9c513edb7999a35ddcc6e3a8d984a96c8fb492a3 in
January) weren't run as part of fate until in
b492cacffd36ad4cb251ba1f13ac398318ee639a in August.

As this hasn't been an issue in practice with actual full decoding
tests, it seems like the actual decoder doesn't call these functions
with such widths. Therefore, we could alternatively fix the test
to only test things that the real decoder does, and this modification
could be reverted.

Signed-off-by: Martin Storsjö <martin at martin.st>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2589060b92eeeb944c6e2b50e38412c0c5fabcf4
---

 libavcodec/arm/hevcdsp_qpel_neon.S | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavcodec/arm/hevcdsp_qpel_neon.S b/libavcodec/arm/hevcdsp_qpel_neon.S
index caa6efa766..f71bec05ed 100644
--- a/libavcodec/arm/hevcdsp_qpel_neon.S
+++ b/libavcodec/arm/hevcdsp_qpel_neon.S
@@ -237,7 +237,7 @@
         vld1.8    {d23}, [r2], r3
         bne 8b
         subs  r5, #8
-        beq       99f
+        ble       99f
         mov r4, r12
         add r6, #16
         mov r0, r6
@@ -280,7 +280,7 @@
         vld1.8    {d23}, [r2], r3
         bne 8b
         subs  r5, #8
-        beq       99f
+        ble       99f
         mov r4, r12
         add r6, #8
         mov r0, r6
@@ -310,7 +310,7 @@
         vld1.8    {d23}, [r2], r3
         bne 8b
         subs  r5, #8
-        beq       99f
+        ble       99f
         mov r4, r12
         add r6, #8
         mov r0, r6
@@ -377,7 +377,7 @@ endfunc
         vst1.16   {q7}, [r0], r1
         bne       8b
         subs      r5, #8
-        beq      99f
+        ble       99f
         mov       r4, r12
         add       r6, #16
         mov       r0, r6
@@ -417,7 +417,7 @@ endfunc
         vst1.8    d0, [r0], r1
         bne       8b
         subs      r5, #8
-        beq      99f
+        ble       99f
         mov       r4, r12
         add       r6, #8
         mov       r0, r6
@@ -446,7 +446,7 @@ endfunc
         vst1.8         d0, [r0], r1
         bne       8b
         subs      r5, #8
-        beq      99f
+        ble       99f
         mov       r4, r12
         add       r6, #8
         add       r10, #16
@@ -533,7 +533,7 @@ endfunc
         \filterh q7
         bne 8b
         subs  r5, #8
-        beq 99f
+        ble 99f
         mov r4, r12
         add r6, #16
         mov r0, r6
@@ -594,7 +594,7 @@ endfunc
         \filterh q7
         bne 8b
         subs  r5, #8
-        beq 99f
+        ble 99f
         mov r4, r12
         add r6, #8
         mov r0, r6
@@ -641,7 +641,7 @@ endfunc
         \filterh q7
         bne 8b
         subs  r5, #8
-        beq 99f
+        ble 99f
         mov r4, r12
         add r6, #8
         mov r0, r6