[FFmpeg-devel] [PATCH] swresample/x86/resample: extend resample_double to support avx and fma3
Muhammad Faiz
mfcc64 at gmail.com
Sun Mar 19 07:48:54 EET 2017
On Thu, Mar 16, 2017 at 11:37 AM, Muhammad Faiz <mfcc64 at gmail.com> wrote:
> benchmark:
> sse2 10.670s
> avx 8.763s
> fma3 8.380s
>
> Signed-off-by: Muhammad Faiz <mfcc64 at gmail.com>
> ---
> libswresample/x86/resample.asm | 15 ++++++++++++---
> libswresample/x86/resample_init.c | 10 ++++++++++
> 2 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
> index 4163df1..7107cf9 100644
> --- a/libswresample/x86/resample.asm
> +++ b/libswresample/x86/resample.asm
> @@ -203,7 +203,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \
> ; horizontal sum & store
> %if mmsize == 32
> vextractf128 xm1, m0, 0x1
> - addps xm0, xm1
> + addp%4 xm0, xm1
> %endif
> movhlps xm1, xm0
> %ifidn %1, float
> @@ -489,8 +489,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
> %if mmsize == 32
> vextractf128 xm1, m0, 0x1
> vextractf128 xm3, m2, 0x1
> - addps xm0, xm1
> - addps xm2, xm3
> + addp%4 xm0, xm1
> + addp%4 xm2, xm3
> %endif
> cvtsi2s%4 xm1, fracd
> subp%4 xm2, xm0
> @@ -608,3 +608,12 @@ RESAMPLE_FNS int16, 2, 1
>
> INIT_XMM sse2
> RESAMPLE_FNS double, 8, 3, d, pdbl_1
> +
> +%if HAVE_AVX_EXTERNAL
> +INIT_YMM avx
> +RESAMPLE_FNS double, 8, 3, d, pdbl_1
> +%endif
> +%if HAVE_FMA3_EXTERNAL
> +INIT_YMM fma3
> +RESAMPLE_FNS double, 8, 3, d, pdbl_1
> +%endif
> diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c
> index e515762..c6b2a36 100644
> --- a/libswresample/x86/resample_init.c
> +++ b/libswresample/x86/resample_init.c
> @@ -42,6 +42,8 @@ RESAMPLE_FUNCS(float, avx);
> RESAMPLE_FUNCS(float, fma3);
> RESAMPLE_FUNCS(float, fma4);
> RESAMPLE_FUNCS(double, sse2);
> +RESAMPLE_FUNCS(double, avx);
> +RESAMPLE_FUNCS(double, fma3);
>
> av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
> {
> @@ -85,6 +87,14 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
> c->dsp.resample_linear = ff_resample_linear_double_sse2;
> c->dsp.resample_common = ff_resample_common_double_sse2;
> }
> + if (EXTERNAL_AVX_FAST(mm_flags)) {
> + c->dsp.resample_linear = ff_resample_linear_double_avx;
> + c->dsp.resample_common = ff_resample_common_double_avx;
> + }
> + if (EXTERNAL_FMA3_FAST(mm_flags)) {
> + c->dsp.resample_linear = ff_resample_linear_double_fma3;
> + c->dsp.resample_common = ff_resample_common_double_fma3;
> + }
> break;
> }
> }
> --
> 2.9.3
>
Applied
Thank's
More information about the ffmpeg-devel
mailing list