[FFmpeg-cvslog] vp9lpf/x86: add ff_vp9_loop_filter_[vh]_16_16_sse2().

James Almer git at videolan.org
Thu Mar 23 12:44:15 EET 2017


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Fri Jan 17 03:55:44 2014 -0300| [1f451eed606b680751e429660cc0945c60d0430c] | committer: Anton Khirnov

vp9lpf/x86: add ff_vp9_loop_filter_[vh]_16_16_sse2().

Similar gains in performance as the SSSE3 version

Signed-off-by: James Almer <jamrial at gmail.com>
Signed-off-by: Anton Khirnov <anton at khirnov.net>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1f451eed606b680751e429660cc0945c60d0430c
---

 libavcodec/x86/vp9dsp_init.c | 19 +++++++++++++++----
 libavcodec/x86/vp9lpf.asm    | 14 ++++++++------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 93453b8..139603c 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -217,10 +217,17 @@ filters_8tap_1d_fn2(avg, 32, avx2, ssse3)
 #undef filters_8tap_1d_fn3
 #undef filter_8tap_1d_fn
 
-void ff_vp9_loop_filter_v_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
-void ff_vp9_loop_filter_v_16_16_avx  (uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
-void ff_vp9_loop_filter_h_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
-void ff_vp9_loop_filter_h_16_16_avx  (uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
+#define lpf_funcs(size1, size2, opt) \
+void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H); \
+void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H)
+
+lpf_funcs(16, 16, sse2);
+lpf_funcs(16, 16, ssse3);
+lpf_funcs(16, 16, avx);
+
+#undef lpf_funcs
 
 #endif /* HAVE_YASM */
 
@@ -283,6 +290,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
         init_fpel(2, 1, 16, avg, sse2);
         init_fpel(1, 1, 32, avg, sse2);
         init_fpel(0, 1, 64, avg, sse2);
+        if (ARCH_X86_64) {
+            dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2;
+            dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2;
+        }
     }
 
     if (EXTERNAL_SSSE3(cpu_flags)) {
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 8568f1d..d4c70f5 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -327,11 +327,11 @@ SECTION .text
 %endif
 
     ; calc fm mask
+%if cpuflag(ssse3)
     pxor                m0, m0
-    movd                m2, Id
-    movd                m3, Ed
-    pshufb              m2, m0                          ; I I I I ...
-    pshufb              m3, m0                          ; E E E E ...
+%endif
+    SPLATB_REG          m2, I, m0                       ; I I I I ...
+    SPLATB_REG          m3, E, m0                       ; E E E E ...
     mova                m0, [pb_80]
     pxor                m2, m0
     pxor                m3, m0
@@ -383,9 +383,10 @@ SECTION .text
     ABSSUB_CMP          m1, m9, m11, m6, m4, m5, m8     ; abs(p2 - p0) <= 1
     pand                m2, m1
     ABSSUB              m4, m10, m11, m5                ; abs(p1 - p0)
+%if cpuflag(ssse3)
     pxor                m0, m0
-    movd                m7, Hd
-    pshufb              m7, m0                          ; H H H H ...
+%endif
+    SPLATB_REG          m7, H, m0                       ; H H H H ...
     pxor                m7, m8
     pxor                m4, m8
     pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition)
@@ -595,6 +596,7 @@ cglobal vp9_loop_filter_h_16_16, 5,10,16, 256, dst, stride, E, I, H, mstride, ds
     RET
 %endmacro
 
+LPF_16_16_VH sse2
 LPF_16_16_VH ssse3
 LPF_16_16_VH avx
 



More information about the ffmpeg-cvslog mailing list