[FFmpeg-cvslog] arm: vp9itxfm: Reorder iadst16 coeffs

Martin Storsjö git at videolan.org
Mon Oct 2 22:14:54 EEST 2017


ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Sat Dec 31 22:27:13 2016 +0200| [08074c092d8c97d71c5986e5325e97ffc956119d] | committer: Martin Storsjö

arm: vp9itxfm: Reorder iadst16 coeffs

This matches the order they are in the 16 bpp version.

There they are in this order, to make sure we access them in the
same order they are declared, easing loading only half of the
coefficients at a time.

This makes the 8 bpp version match the 16 bpp version better.

Signed-off-by: Martin Storsjö <martin at martin.st>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=08074c092d8c97d71c5986e5325e97ffc956119d
---

 libavcodec/arm/vp9itxfm_neon.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index 1d4d6a7910..a612b25f4f 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -37,8 +37,8 @@ idct_coeffs:
 endconst
 
 const iadst16_coeffs, align=4
-        .short  16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
-        .short  11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
+        .short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
+        .short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
 endconst
 
 @ Do four 4x4 transposes, using q registers for the subtransposes that don't
@@ -678,19 +678,19 @@ function iadst16
         vld1.16         {q0-q1}, [r12,:128]
 
         mbutterfly_l    q3,  q2,  d31, d16, d0[1], d0[0] @ q3  = t1,   q2  = t0
-        mbutterfly_l    q5,  q4,  d23, d24, d2[1], d2[0] @ q5  = t9,   q4  = t8
+        mbutterfly_l    q5,  q4,  d23, d24, d1[1], d1[0] @ q5  = t9,   q4  = t8
         butterfly_n     d31, d24, q3,  q5,  q6,  q5      @ d31 = t1a,  d24 = t9a
         mbutterfly_l    q7,  q6,  d29, d18, d0[3], d0[2] @ q7  = t3,   q6  = t2
         butterfly_n     d16, d23, q2,  q4,  q3,  q4      @ d16 = t0a,  d23 = t8a
 
-        mbutterfly_l    q3,  q2,  d21, d26, d2[3], d2[2] @ q3  = t11,  q2  = t10
+        mbutterfly_l    q3,  q2,  d21, d26, d1[3], d1[2] @ q3  = t11,  q2  = t10
         butterfly_n     d29, d26, q7,  q3,  q4,  q3      @ d29 = t3a,  d26 = t11a
-        mbutterfly_l    q5,  q4,  d27, d20, d1[1], d1[0] @ q5  = t5,   q4  = t4
+        mbutterfly_l    q5,  q4,  d27, d20, d2[1], d2[0] @ q5  = t5,   q4  = t4
         butterfly_n     d18, d21, q6,  q2,  q3,  q2      @ d18 = t2a,  d21 = t10a
 
         mbutterfly_l    q7,  q6,  d19, d28, d3[1], d3[0] @ q7  = t13,  q6  = t12
         butterfly_n     d20, d28, q5,  q7,  q2,  q7      @ d20 = t5a,  d28 = t13a
-        mbutterfly_l    q3,  q2,  d25, d22, d1[3], d1[2] @ q3  = t7,   q2  = t6
+        mbutterfly_l    q3,  q2,  d25, d22, d2[3], d2[2] @ q3  = t7,   q2  = t6
         butterfly_n     d27, d19, q4,  q6,  q5,  q6      @ d27 = t4a,  d19 = t12a
 
         mbutterfly_l    q5,  q4,  d17, d30, d3[3], d3[2] @ q5  = t15,  q4  = t14



More information about the ffmpeg-cvslog mailing list