[FFmpeg-cvslog] dcadsp: split lfe_dir cases

Sat Feb 8 02:54:13 CET 2014

ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Thu Feb  6 00:41:54 2014 +0000| [45854df9a5220bdde400a447f63f61618b89dde2] | committer: Michael Niedermayer

dcadsp: split lfe_dir cases

The x86 runs short on registers because numerous elements are not static.
In addition, splitting them allows more optimized code, at least for x86.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=45854df9a5220bdde400a447f63f61618b89dde2
---

 libavcodec/arm/dcadsp_init_arm.c |   33 ++++++++++++++++++++++++++++++---
 libavcodec/dcadec.c              |   10 +++++-----
 libavcodec/dcadsp.c              |   20 +++++++++++++++++---
 libavcodec/dcadsp.h              |    4 ++--
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index 8893f48..0a8c2fe 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -47,16 +47,43 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
                                 float out[32], const float in[32],
                                 float scale);
 
+static void lfe_fir0_vfp(float *out, const float *in, const float *coefs,
+                         float scale)
+{
+    ff_dca_lfe_fir_vfp(out, in, coefs, 32, scale);
+}
+
+static void lfe_fir1_vfp(float *out, const float *in, const float *coefs,
+                         float scale)
+{
+    ff_dca_lfe_fir_vfp(out, in, coefs, 64, scale);
+}
+
+static void lfe_fir0_neon(float *out, const float *in, const float *coefs,
+                          float scale)
+{
+    ff_dca_lfe_fir_neon(out, in, coefs, 32, scale);
+}
+
+static void lfe_fir1_neon(float *out, const float *in, const float *coefs,
+                          float scale)
+{
+    ff_dca_lfe_fir_neon(out, in, coefs, 64, scale);
+}
+
 av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
     if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
-        s->lfe_fir = ff_dca_lfe_fir_vfp;
+        s->lfe_fir[0]      = lfe_fir0_vfp;
+        s->lfe_fir[1]      = lfe_fir1_vfp;
         s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
     }
-    if (have_neon(cpu_flags))
-        s->lfe_fir = ff_dca_lfe_fir_neon;
+    if (have_neon(cpu_flags)) {
+        s->lfe_fir[0] = lfe_fir0_neon;
+        s->lfe_fir[1] = lfe_fir1_neon;
+    }
 }
 
 av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 4b7f5d0..1deddf5 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -1118,23 +1118,23 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
      * samples_out: An array holding interpolated samples
      */
 
-    int decifactor;
+    int idx;
     const float *prCoeff;
     int deciindex;
 
     /* Select decimation filter */
     if (decimation_select == 1) {
-        decifactor = 64;
+        idx = 1;
         prCoeff = lfe_fir_128;
     } else {
-        decifactor = 32;
+        idx = 0;
         prCoeff = lfe_fir_64;
     }
     /* Interpolation */
     for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
-        s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, scale);
+        s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff, scale);
         samples_in++;
-        samples_out += 2 * decifactor;
+        samples_out += 2 * 32 * (1 + idx);
     }
 }
 
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index 87c9f90..b14c435 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -32,8 +32,9 @@ static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale)
         dst[i] = src[i] * fscale;
 }
 
-static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
-                          int decifactor, float scale)
+static inline void
+dca_lfe_fir(float *out, const float *in, const float *coefs,
+            int decifactor, float scale)
 {
     float *out2 = out + decifactor;
     const float *cf0 = coefs;
@@ -82,9 +83,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act,
     }
 }
 
+static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs,
+                           float scale)
+{
+    dca_lfe_fir(out, in, coefs, 32, scale);
+}
+
+static void dca_lfe_fir1_c(float *out, const float *in, const float *coefs,
+                           float scale)
+{
+    dca_lfe_fir(out, in, coefs, 64, scale);
+}
+
 av_cold void ff_dcadsp_init(DCADSPContext *s)
 {
-    s->lfe_fir = dca_lfe_fir_c;
+    s->lfe_fir[0] = dca_lfe_fir0_c;
+    s->lfe_fir[1] = dca_lfe_fir1_c;
     s->qmf_32_subbands = dca_qmf_32_subbands;
     s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
     if (ARCH_ARM) ff_dcadsp_init_arm(s);
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index 215dce1..20d1894 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -23,8 +23,8 @@
 #include "synth_filter.h"
 
 typedef struct DCADSPContext {
-    void (*lfe_fir)(float *out, const float *in, const float *coefs,
-                    int decifactor, float scale);
+    void (*lfe_fir[2])(float *out, const float *in, const float *coefs,
+                       float scale);
     void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
                             SynthFilterContext *synth, FFTContext *imdct,
                             float synth_buf_ptr[512],