[FFmpeg-cvslog] avfilter/x86/af_afir: add FMA3 SIMD

Paul B Mahol git at videolan.org
Sun Sep 17 12:13:14 EEST 2023


ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sun Sep 10 19:25:20 2023 +0200| [c5effe7d3db6610e9ec5a1efbe11d2b87bb34d61] | committer: Paul B Mahol

avfilter/x86/af_afir: add FMA3 SIMD

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c5effe7d3db6610e9ec5a1efbe11d2b87bb34d61
---

 libavfilter/x86/af_afir.asm    | 27 +++++++++++++++++++++++++++
 libavfilter/x86/af_afir_init.c |  5 +++++
 2 files changed, 32 insertions(+)

diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index 2cc09709a2..ed0276c7b9 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -67,3 +67,30 @@ INIT_XMM sse3
 FCMUL_ADD
 INIT_YMM avx
 FCMUL_ADD
+
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+cglobal fcmul_add, 4,4,4, sum, t, c, len
+    shl       lend, 3
+    add         tq, lenq
+    add         cq, lenq
+    add       sumq, lenq
+    neg       lenq
+.loop:
+    movaps    m0, [tq + lenq]
+    movaps    m1, [cq + lenq]
+    vpermilps m3, m0, 177
+    vpermilps m2, m1, 160
+    vpermilps m1, m1, 245
+    mulps     m1, m1, m3
+    vfmaddsub132ps m0, m1, m2
+    addps     m0, m0, [sumq + lenq]
+    movaps    [sumq + lenq], m0
+    add       lenq, mmsize
+    jl .loop
+    movss xm0, [tq + lenq]
+    mulss xm0, [cq + lenq]
+    addss xm0, [sumq + lenq]
+    movss [sumq + lenq], xm0
+    RET
+%endif
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index e53817b9c0..d573acf10b 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -26,6 +26,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
                        ptrdiff_t len);
 void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
                       ptrdiff_t len);
+void ff_fcmul_add_fma3(float *sum, const float *t, const float *c,
+                       ptrdiff_t len);
 
 av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
 {
@@ -37,4 +39,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
         s->fcmul_add = ff_fcmul_add_avx;
     }
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
+        s->fcmul_add = ff_fcmul_add_fma3;
+    }
 }



More information about the ffmpeg-cvslog mailing list