[FFmpeg-devel] [PATCH] aacenc_utils: unroll loops to allow compiler to use SIMD.

Reimar Döffinger Reimar.Doeffinger at gmx.de
Sun Mar 6 19:35:58 CET 2016


Approximately 10% faster transcode from mp3 to aac
with default settings.

Signed-off-by: Reimar Döffinger <Reimar.Doeffinger at gmx.de>
---
 libavcodec/aacenc_utils.h | 47 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/libavcodec/aacenc_utils.h b/libavcodec/aacenc_utils.h
index b9bd6bf..1639021 100644
--- a/libavcodec/aacenc_utils.h
+++ b/libavcodec/aacenc_utils.h
@@ -36,15 +36,29 @@
 #define ROUND_TO_ZERO 0.1054f
 #define C_QUANT 0.4054f
 
+#define ABSPOW(inv, outv) \
+do { \
+    float a = (inv); \
+    a = fabsf(a); \
+    (outv) = sqrtf(a * sqrtf(a)); \
+} while(0)
+
 static inline void abs_pow34_v(float *out, const float *in, const int size)
 {
     int i;
-    for (i = 0; i < size; i++) {
-        float a = fabsf(in[i]);
-        out[i] = sqrtf(a * sqrtf(a));
+    for (i = 0; i < size - 3; i += 4) {
+        ABSPOW(in[i], out[i]);
+        ABSPOW(in[i+1], out[i+1]);
+        ABSPOW(in[i+2], out[i+2]);
+        ABSPOW(in[i+3], out[i+3]);
+    }
+    for (; i < size; i++) {
+        ABSPOW(in[i], out[i]);
     }
 }
 
+#undef ABSPOW
+
 static inline float pos_pow34(float a)
 {
     return sqrtf(a * sqrtf(a));
@@ -61,20 +75,35 @@ static inline int quant(float coef, const float Q, const float rounding)
     return sqrtf(a * sqrtf(a)) + rounding;
 }
 
+
+#define Q(scv, inv, outv) \
+do { \
+    float qc = (scv) * Q34; \
+    int tmp = (int)FFMIN(qc + rounding, (float)maxval); \
+    if (is_signed && (inv) < 0.0f) { \
+        tmp = -tmp; \
+    } \
+    (outv) = tmp; \
+} while(0)
+
 static inline void quantize_bands(int *out, const float *in, const float *scaled,
                                   int size, float Q34, int is_signed, int maxval,
                                   const float rounding)
 {
     int i;
-    for (i = 0; i < size; i++) {
-        float qc = scaled[i] * Q34;
-        out[i] = (int)FFMIN(qc + rounding, (float)maxval);
-        if (is_signed && in[i] < 0.0f) {
-            out[i] = -out[i];
-        }
+    for (i = 0; i < size - 3; i += 4) {
+        Q(scaled[i], in[i], out[i]);
+        Q(scaled[i+1], in[i+1], out[i+1]);
+        Q(scaled[i+2], in[i+2], out[i+2]);
+        Q(scaled[i+3], in[i+3], out[i+3]);
+    }
+    for (; i < size; i++) {
+        Q(scaled[i], in[i], out[i]);
     }
 }
 
+#undef Q
+
 static inline float find_max_val(int group_len, int swb_size, const float *scaled)
 {
     float maxval = 0.0f;
-- 
2.7.0



More information about the ffmpeg-devel mailing list