[FFmpeg-devel] [PATCH] x86/vp9: add avx2 subpel MC SIMD for 10/12bpp

Ronald S. Bultje rsbultje at gmail.com
Fri Sep 18 10:10:33 CEST 2015


Hi,
On Sep 18, 2015 3:55 AM, "James Almer" <jamrial at gmail.com> wrote:
>
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> Unbenched as i lack the hardware to do so.
>
>  libavcodec/x86/vp9dsp_init_16bpp_template.c | 22 ++++++++++++++++++++++
>  libavcodec/x86/vp9mc_16bpp.asm              |  6 ++++++
>  2 files changed, 28 insertions(+)
>
> diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c
b/libavcodec/x86/vp9dsp_init_16bpp_template.c
> index 3e2737b..a1ce212 100644
> --- a/libavcodec/x86/vp9dsp_init_16bpp_template.c
> +++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c
> @@ -33,16 +33,31 @@ extern const int16_t ff_filters_16bpp[3][15][4][16];
>
>  decl_mc_funcs(4, sse2, int16_t, 16, BPC);
>  decl_mc_funcs(8, sse2, int16_t, 16, BPC);
> +decl_mc_funcs(16, avx2, int16_t, 16, BPC);
>
>  mc_rep_funcs(16,  8, 16, sse2, int16_t, 16, BPC);
>  mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC);
>  mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC);
> +mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC);
> +mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC);
>
>  filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp)
>  filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp)
> +filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp)
> +filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp)
> +filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp)
> +filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp)
> +filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp)
> +filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp)
>
>  filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp)
>  filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp)
> +filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp)
> +filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp)
> +filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp)
> +filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp)
> +filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
> +filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp)
>
>  #endif /* HAVE_YASM */
>
> @@ -56,6 +71,13 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp)
>          init_subpel3(1, avg, BPC, sse2);
>      }
>
> +    if (EXTERNAL_AVX2(cpu_flags)) {
> +        init_subpel3_32_64(0,  put, BPC, avx2);
> +        init_subpel3_32_64(1,  avg, BPC, avx2);
> +        init_subpel2(2, 0, 16, put, BPC, avx2);
> +        init_subpel2(2, 1, 16, avg, BPC, avx2);
> +    }
> +
>  #endif /* HAVE_YASM */
>
>      ff_vp9dsp_init_16bpp_x86(dsp);
> diff --git a/libavcodec/x86/vp9mc_16bpp.asm
b/libavcodec/x86/vp9mc_16bpp.asm
> index 52fc5ee..d66da55 100644
> --- a/libavcodec/x86/vp9mc_16bpp.asm
> +++ b/libavcodec/x86/vp9mc_16bpp.asm
> @@ -201,6 +201,9 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2,
dst, dstride, src, sstride,
>  INIT_XMM sse2
>  filter_h_fn put
>  filter_h_fn avg
> +INIT_YMM avx2
> +filter_h_fn put
> +filter_h_fn avg
>
>  %macro filter_v4_fn 1-2 12
>  %if ARCH_X86_64
> @@ -419,3 +422,6 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2,
dst, dstride, src, sstride,
>  INIT_XMM sse2
>  filter_v_fn put
>  filter_v_fn avg
> +INIT_YMM avx2
> +filter_v_fn put
> +filter_v_fn avg
> --
> 2.5.2

Cool, I was hoping that would work but don't have Intel's emulator; thanks
for testing && lgtm.

Ronald


More information about the ffmpeg-devel mailing list