[FFmpeg-cvslog] avfilter/x86/af_afir: add FMA3 SIMD
Paul B Mahol
git at videolan.org
Sun Sep 17 12:13:14 EEST 2023
ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sun Sep 10 19:25:20 2023 +0200| [c5effe7d3db6610e9ec5a1efbe11d2b87bb34d61] | committer: Paul B Mahol
avfilter/x86/af_afir: add FMA3 SIMD
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c5effe7d3db6610e9ec5a1efbe11d2b87bb34d61
---
libavfilter/x86/af_afir.asm | 27 +++++++++++++++++++++++++++
libavfilter/x86/af_afir_init.c | 5 +++++
2 files changed, 32 insertions(+)
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index 2cc09709a2..ed0276c7b9 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -67,3 +67,30 @@ INIT_XMM sse3
FCMUL_ADD
INIT_YMM avx
FCMUL_ADD
+
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+cglobal fcmul_add, 4,4,4, sum, t, c, len
+ shl lend, 3
+ add tq, lenq
+ add cq, lenq
+ add sumq, lenq
+ neg lenq
+.loop:
+ movaps m0, [tq + lenq]
+ movaps m1, [cq + lenq]
+ vpermilps m3, m0, 177
+ vpermilps m2, m1, 160
+ vpermilps m1, m1, 245
+ mulps m1, m1, m3
+ vfmaddsub132ps m0, m1, m2
+ addps m0, m0, [sumq + lenq]
+ movaps [sumq + lenq], m0
+ add lenq, mmsize
+ jl .loop
+ movss xm0, [tq + lenq]
+ mulss xm0, [cq + lenq]
+ addss xm0, [sumq + lenq]
+ movss [sumq + lenq], xm0
+ RET
+%endif
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index e53817b9c0..d573acf10b 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -26,6 +26,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
ptrdiff_t len);
+void ff_fcmul_add_fma3(float *sum, const float *t, const float *c,
+ ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{
@@ -37,4 +39,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_AVX_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_avx;
}
+ if (EXTERNAL_FMA3_FAST(cpu_flags)) {
+ s->fcmul_add = ff_fcmul_add_fma3;
+ }
}
More information about the ffmpeg-cvslog
mailing list