[FFmpeg-devel] [PATCH] x86/dcadsp: add ff_dca_lfe_fir0_fma3

James Almer jamrial at gmail.com
Thu Apr 3 06:46:18 CEST 2014


~10% faster than the SSE version.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/x86/dcadsp.asm    | 9 +++++++++
 libavcodec/x86/dcadsp_init.c | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index c5a1e77..24cf9fa 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -132,11 +132,16 @@ DECODE_HF
     mulps       va, %2
     mulps       vb, %2
 %if %0 == 3
+%if cpuflag(fma3)
+    fmaddps     va, m4, %3, va
+    fmaddps     vb, m0, %3, vb
+%else
     mulps       m4, %3
     mulps       m0, %3
     addps       va, m4
     addps       vb, m0
 %endif
+%endif
     ; va = va1 va2 va3 va4
     ; vb = vb1 vb2 vb3 vb4
 %if %1
@@ -198,6 +203,10 @@ cglobal dca_lfe_fir%1, 3,3,6-%1, out, in, cf0
 INIT_XMM sse
 DCA_LFE_FIR 0
 DCA_LFE_FIR 1
+%if HAVE_FMA3_EXTERNAL
+INIT_XMM fma3
+DCA_LFE_FIR 0
+%endif
 
 %macro SETZERO 1
 %if cpuflag(sse2) && notcpuflag(avx)
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 48880d6..decd37e 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -34,6 +34,7 @@ void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS
                        int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
 void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
+void ff_dca_lfe_fir0_fma3(float *out, const float *in, const float *coefs);
 
 av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 {
@@ -54,6 +55,10 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
     if (EXTERNAL_SSE4(cpu_flags)) {
         s->decode_hf = ff_decode_hf_sse4;
     }
+
+    if (EXTERNAL_FMA3(cpu_flags)) {
+        s->lfe_fir[0]        = ff_dca_lfe_fir0_fma3;
+    }
 }
 
 
-- 
1.8.3.2



More information about the ffmpeg-devel mailing list