[FFmpeg-devel] [PATCH 4/4] x86/af_afir: add ff_fcmul_add_avx()
Paul B Mahol
onemda at gmail.com
Thu Jan 3 10:47:10 EET 2019
On 1/3/19, James Almer <jamrial at gmail.com> wrote:
> fcmul_add_c: 1228.8
> fcmul_add_sse3: 334.3
> fcmul_add_avx: 186.3
>
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> libavfilter/x86/af_afir.asm | 8 +++++++-
> libavfilter/x86/af_afir_init.c | 5 +++++
> 2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
> index fcc1f426db..8054ac5f10 100644
> --- a/libavfilter/x86/af_afir.asm
> +++ b/libavfilter/x86/af_afir.asm
> @@ -27,7 +27,7 @@ SECTION .text
> ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
> ;------------------------------------------------------------------------------
>
> -INIT_XMM sse3
> +%macro FCMUL_ADD 0
> cglobal fcmul_add, 4,4,6, sum, t, c, len
> shl lend, 3
> add tq, lenq
> @@ -61,3 +61,9 @@ ALIGN 16
> addss xm0, [sumq + lenq]
> movss [sumq + lenq], xm0
> RET
> +%endmacro
> +
> +INIT_XMM sse3
> +FCMUL_ADD
> +INIT_YMM avx
> +FCMUL_ADD
> diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
> index 29e6f976b2..c37212c381 100644
> --- a/libavfilter/x86/af_afir_init.c
> +++ b/libavfilter/x86/af_afir_init.c
> @@ -24,6 +24,8 @@
>
> void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
> ptrdiff_t len);
> +void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
> + ptrdiff_t len);
>
> av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
> {
> @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
> if (EXTERNAL_SSE3(cpu_flags)) {
> s->fcmul_add = ff_fcmul_add_sse3;
> }
> + if (EXTERNAL_AVX_FAST(cpu_flags)) {
> + s->fcmul_add = ff_fcmul_add_avx;
> + }
> }
> --
> 2.20.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
OK
Also write CPU on which you tested it.
More information about the ffmpeg-devel
mailing list