[FFmpeg-cvslog] x86/vp9: add avx2 subpel MC SIMD for 10/12bpp

James Almer git at videolan.org
Fri Sep 18 17:29:13 CEST 2015


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Thu Sep 17 22:49:58 2015 -0300| [2f9ab159607fd088f8ced1e603da14d203fbfffe] | committer: James Almer

x86/vp9: add avx2 subpel MC SIMD for 10/12bpp

Reviewed-by: Ronald S. Bultje <rsbultje at gmail.com>
Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f9ab159607fd088f8ced1e603da14d203fbfffe
---

 libavcodec/x86/vp9dsp_init_16bpp_template.c |   22 ++++++++++++++++++++++
 libavcodec/x86/vp9mc_16bpp.asm              |    6 ++++++
 2 files changed, 28 insertions(+)

diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c
index 3e2737b..a1ce212 100644
--- a/libavcodec/x86/vp9dsp_init_16bpp_template.c
+++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c
@@ -33,16 +33,31 @@ extern const int16_t ff_filters_16bpp[3][15][4][16];
 
 decl_mc_funcs(4, sse2, int16_t, 16, BPC);
 decl_mc_funcs(8, sse2, int16_t, 16, BPC);
+decl_mc_funcs(16, avx2, int16_t, 16, BPC);
 
 mc_rep_funcs(16,  8, 16, sse2, int16_t, 16, BPC);
 mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC);
 mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC);
+mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC);
+mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC);
 
 filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp)
 filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp)
+filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp)
 
 filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp)
 filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp)
+filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp)
 
 #endif /* HAVE_YASM */
 
@@ -56,6 +71,13 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp)
         init_subpel3(1, avg, BPC, sse2);
     }
 
+    if (EXTERNAL_AVX2(cpu_flags)) {
+        init_subpel3_32_64(0,  put, BPC, avx2);
+        init_subpel3_32_64(1,  avg, BPC, avx2);
+        init_subpel2(2, 0, 16, put, BPC, avx2);
+        init_subpel2(2, 1, 16, avg, BPC, avx2);
+    }
+
 #endif /* HAVE_YASM */
 
     ff_vp9dsp_init_16bpp_x86(dsp);
diff --git a/libavcodec/x86/vp9mc_16bpp.asm b/libavcodec/x86/vp9mc_16bpp.asm
index 52fc5ee..d66da55 100644
--- a/libavcodec/x86/vp9mc_16bpp.asm
+++ b/libavcodec/x86/vp9mc_16bpp.asm
@@ -201,6 +201,9 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride,
 INIT_XMM sse2
 filter_h_fn put
 filter_h_fn avg
+INIT_YMM avx2
+filter_h_fn put
+filter_h_fn avg
 
 %macro filter_v4_fn 1-2 12
 %if ARCH_X86_64
@@ -419,3 +422,6 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride,
 INIT_XMM sse2
 filter_v_fn put
 filter_v_fn avg
+INIT_YMM avx2
+filter_v_fn put
+filter_v_fn avg



More information about the ffmpeg-cvslog mailing list