[FFmpeg-devel] [PATCH] x86/dsputilenc: implement SSE2 version of diff_pixels

James Almer jamrial at gmail.com
Mon May 26 08:49:35 CEST 2014


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/x86/dsputilenc.asm   | 25 +++++++++++++++++++++++++
 libavcodec/x86/dsputilenc_mmx.c |  3 +++
 2 files changed, 28 insertions(+)

diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index 46330fe..dc2b041 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -419,6 +419,31 @@ cglobal diff_pixels, 4,5
     jne .loop
     REP_RET
 
+INIT_XMM sse2
+cglobal diff_pixels, 4,5
+    movsxdifnidn r3, r3d
+    pxor         m4, m4
+    add          r0,  128
+    mov          r4, -128
+.loop:
+    movh         m0, [r1]
+    movh         m2, [r2]
+    movh         m1, [r1+r3]
+    movh         m3, [r2+r3]
+    punpcklbw    m0, m4
+    punpcklbw    m1, m4
+    punpcklbw    m2, m4
+    punpcklbw    m3, m4
+    psubw        m0, m2
+    psubw        m1, m3
+    mova [r0+r4+0 ], m0
+    mova [r0+r4+16], m1
+    lea          r1, [r1+r3*2]
+    lea          r2, [r2+r3*2]
+    add          r4, 32
+    jne .loop
+    RET
+
 INIT_MMX mmx
 ; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
 cglobal pix_sum16, 2, 3
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index e63d510..acff947 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -36,6 +36,8 @@ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
 void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
 void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                         int stride);
+void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+                         int stride);
 int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
 int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
 int ff_sum_abs_dctelem_mmx(int16_t *block);
@@ -971,6 +973,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->sse[0] = ff_sse16_sse2;
         c->sum_abs_dctelem   = ff_sum_abs_dctelem_sse2;
+        c->diff_pixels = ff_diff_pixels_sse2;
 
 #if HAVE_ALIGNED_STACK
         c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
-- 
1.8.5.5



More information about the ffmpeg-devel mailing list