[FFmpeg-cvslog] lavu/sha512: Fully unroll the transform function loops

James Almer git at videolan.org
Wed Sep 11 22:26:23 CEST 2013


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Mon Sep  9 20:16:40 2013 -0300| [bbcaf25d4d2130fa9c34c314628f9fd2f706b61b] | committer: Michael Niedermayer

lavu/sha512: Fully unroll the transform function loops

crypto_bench SHA-512 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.7.3 x86_64

Before:
lavu       SHA-512      size: 1048576  runs:   1024  time:   12.737 +- 0.147

After:
lavu       SHA-512      size: 1048576  runs:   1024  time:   11.670 +- 0.173

Signed-off-by: James Almer <jamrial at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bbcaf25d4d2130fa9c34c314628f9fd2f706b61b
---

 libavutil/sha512.c |   45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/libavutil/sha512.c b/libavutil/sha512.c
index 8413603..66a864f 100644
--- a/libavutil/sha512.c
+++ b/libavutil/sha512.c
@@ -150,27 +150,32 @@ static void sha512_transform(uint64_t *state, const uint8_t buffer[128])
         a = T1 + T2;
     }
 #else
-    for (i = 0; i < 16 - 7;) {
-        ROUND512_0_TO_15(a, b, c, d, e, f, g, h);
-        ROUND512_0_TO_15(h, a, b, c, d, e, f, g);
-        ROUND512_0_TO_15(g, h, a, b, c, d, e, f);
-        ROUND512_0_TO_15(f, g, h, a, b, c, d, e);
-        ROUND512_0_TO_15(e, f, g, h, a, b, c, d);
-        ROUND512_0_TO_15(d, e, f, g, h, a, b, c);
-        ROUND512_0_TO_15(c, d, e, f, g, h, a, b);
-        ROUND512_0_TO_15(b, c, d, e, f, g, h, a);
-    }
 
-    for (; i < 80 - 7;) {
-        ROUND512_16_TO_80(a, b, c, d, e, f, g, h);
-        ROUND512_16_TO_80(h, a, b, c, d, e, f, g);
-        ROUND512_16_TO_80(g, h, a, b, c, d, e, f);
-        ROUND512_16_TO_80(f, g, h, a, b, c, d, e);
-        ROUND512_16_TO_80(e, f, g, h, a, b, c, d);
-        ROUND512_16_TO_80(d, e, f, g, h, a, b, c);
-        ROUND512_16_TO_80(c, d, e, f, g, h, a, b);
-        ROUND512_16_TO_80(b, c, d, e, f, g, h, a);
-    }
+#define R512_0 \
+    ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \
+    ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \
+    ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \
+    ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \
+    ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \
+    ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \
+    ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \
+    ROUND512_0_TO_15(b, c, d, e, f, g, h, a)
+
+    i = 0;
+    R512_0; R512_0;
+
+#define R512_16 \
+    ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \
+    ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \
+    ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \
+    ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \
+    ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \
+    ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \
+    ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \
+    ROUND512_16_TO_80(b, c, d, e, f, g, h, a)
+
+    R512_16; R512_16; R512_16; R512_16;
+    R512_16; R512_16; R512_16; R512_16;
 #endif
     state[0] += a;
     state[1] += b;



More information about the ffmpeg-cvslog mailing list