[FFmpeg-devel] [PATCH 2/2] x86/videodsp: add emulated_edge_mc_ssse3

James Almer jamrial at gmail.com
Mon Jun 23 20:51:54 CEST 2014


Signed-off-by: James Almer <jamrial at gmail.com>
---
Not benched.

 libavcodec/x86/videodsp.asm    | 16 ++++++++++++++++
 libavcodec/x86/videodsp_init.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index ad15af9..761a0b0 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -21,6 +21,10 @@
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA
+
+cextern pb_3
+
 SECTION .text
 
 ; slow vertical extension loop function. Works with variable-width, and
@@ -96,6 +100,9 @@ cglobal emu_edge_hvar, 5, 6, 2, dst, dst_stride, start_x, n_words, h, w
     lea            dstq, [dstq+n_wordsq*2]
     neg        n_wordsq
     lea        start_xq, [start_xq+n_wordsq*2]
+%if cpuflag(ssse3)
+    mova             m1, [pb_3]
+%endif
 .y_loop:                                        ; do {
     mov              wq, n_wordsq               ;   initialize w
     SPLATB_LOAD      m0, dstq+start_xq, m1      ;   read(1); splat
@@ -120,6 +127,8 @@ hvar_fn
 
 INIT_XMM sse2
 hvar_fn
+INIT_XMM ssse3
+hvar_fn
 
 ; macro to read/write a horizontal number of pixels (%2) to/from registers
 ; on sse, - fills xmm0-15 for consecutive sets of 16 pixels
@@ -412,6 +421,9 @@ cglobal emu_edge_hfix %+ %%n, 4, 5, 2, dst, dst_stride, start_x, bh, val
 %else
 cglobal emu_edge_hfix %+ %%n, 4, 4, 2, dst, dst_stride, start_x, bh
 %endif
+%if cpuflag(ssse3)
+    mova             m1, [pb_3]
+%endif
 .loop_y:                                        ; do {
     READ_V_PIXEL    %%n, dstq+start_xq          ;   $variable_regs = read($n)
     WRITE_V_PIXEL   %%n, dstq                   ;   write($variable_regs, $n)
@@ -436,6 +448,10 @@ H_EXTEND 16, 22
 %endif
 INIT_XMM sse2
 H_EXTEND 16, 22
+INIT_MMX ssse3
+H_EXTEND 4, 14
+INIT_XMM ssse3
+H_EXTEND 16, 22
 
 %macro PREFETCH_FN 1
 cglobal prefetch, 3, 3, 0, buf, stride, h
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index bd61ab4..85c541f 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -147,6 +147,23 @@ static emu_edge_hfix_func *hfixtbl_sse2[11] = {
     ff_emu_edge_hfix20_sse2,   ff_emu_edge_hfix22_sse2
 };
 extern emu_edge_hvar_func ff_emu_edge_hvar_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix4_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix6_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix8_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix10_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix12_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix14_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_ssse3;
+static emu_edge_hfix_func *hfixtbl_ssse3[11] = {
+    ff_emu_edge_hfix2_mmx,    ff_emu_edge_hfix4_ssse3,  ff_emu_edge_hfix6_ssse3,
+    ff_emu_edge_hfix8_ssse3,  ff_emu_edge_hfix10_ssse3, ff_emu_edge_hfix12_ssse3,
+    ff_emu_edge_hfix14_ssse3, ff_emu_edge_hfix16_ssse3, ff_emu_edge_hfix18_ssse3,
+    ff_emu_edge_hfix20_ssse3, ff_emu_edge_hfix22_ssse3
+};
+extern emu_edge_hvar_func ff_emu_edge_hvar_ssse3;
 
 static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
                                               ptrdiff_t dst_stride,
@@ -268,6 +285,18 @@ static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
                      src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
                      hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
 }
+
+static av_noinline void emulated_edge_mc_ssse3(uint8_t *buf, const uint8_t *src,
+                                               ptrdiff_t buf_stride,
+                                               ptrdiff_t src_stride,
+                                               int block_w, int block_h,
+                                               int src_x, int src_y, int w,
+                                               int h)
+{
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+                     hfixtbl_ssse3, &ff_emu_edge_hvar_ssse3);
+}
 #endif /* HAVE_YASM */
 
 void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h);
@@ -301,5 +330,8 @@ av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
     if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) {
         ctx->emulated_edge_mc = emulated_edge_mc_sse2;
     }
+    if (EXTERNAL_SSSE3(cpu_flags) && bpc <= 8) {
+        ctx->emulated_edge_mc = emulated_edge_mc_ssse3;
+    }
 #endif /* HAVE_YASM */
 }
-- 
1.8.5.5



More information about the ffmpeg-devel mailing list