[FFmpeg-devel] [PATCH 08/14] aarch64: vp9itxfm16: Avoid .irp when it doesn't save any lines

Martin Storsjö martin at martin.st
Fri Mar 17 00:10:13 EET 2017


This makes the code a bit more readable.
---
 libavcodec/aarch64/vp9itxfm_16bpp_neon.S | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/libavcodec/aarch64/vp9itxfm_16bpp_neon.S b/libavcodec/aarch64/vp9itxfm_16bpp_neon.S
index f80604f..86ea29e 100644
--- a/libavcodec/aarch64/vp9itxfm_16bpp_neon.S
+++ b/libavcodec/aarch64/vp9itxfm_16bpp_neon.S
@@ -886,21 +886,21 @@ function \txfm\()16_1d_4x16_pass1_neon
         // for the first slice of the second pass (where it is the
         // last 4x4 block).
         add             x0,  x0,  #16
-.irp i, 20, 24, 28
-        store           \i,  x0,  #16
-.endr
+        st1             {v20.4s},  [x0], #16
+        st1             {v24.4s},  [x0], #16
+        st1             {v28.4s},  [x0], #16
         add             x0,  x0,  #16
-.irp i, 21, 25, 29
-        store           \i,  x0,  #16
-.endr
+        st1             {v21.4s},  [x0], #16
+        st1             {v25.4s},  [x0], #16
+        st1             {v29.4s},  [x0], #16
         add             x0,  x0,  #16
-.irp i, 22, 26, 30
-        store           \i,  x0,  #16
-.endr
+        st1             {v22.4s},  [x0], #16
+        st1             {v26.4s},  [x0], #16
+        st1             {v30.4s},  [x0], #16
         add             x0,  x0,  #16
-.irp i, 23, 27, 31
-        store           \i,  x0,  #16
-.endr
+        st1             {v23.4s},  [x0], #16
+        st1             {v27.4s},  [x0], #16
+        st1             {v31.4s},  [x0], #16
 
         mov             v28.16b, v16.16b
         mov             v29.16b, v17.16b
-- 
2.7.4



More information about the ffmpeg-devel mailing list