[FFmpeg-cvslog] r15591 - trunk/libavcodec/i386/dsputilenc_mmx.c

bcoudurier subversion
Thu Oct 9 20:36:49 CEST 2008


Author: bcoudurier
Date: Thu Oct  9 20:36:49 2008
New Revision: 15591

Log:
get_pixels_sse2, ~+12% performance compared to mmx

Modified:
   trunk/libavcodec/i386/dsputilenc_mmx.c

Modified: trunk/libavcodec/i386/dsputilenc_mmx.c
==============================================================================
--- trunk/libavcodec/i386/dsputilenc_mmx.c	(original)
+++ trunk/libavcodec/i386/dsputilenc_mmx.c	Thu Oct  9 20:36:49 2008
@@ -56,6 +56,40 @@ static void get_pixels_mmx(DCTELEM *bloc
     );
 }
 
+static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size)
+{
+    asm volatile(
+        "pxor %%xmm7,      %%xmm7         \n\t"
+        "movq (%0),        %%xmm0         \n\t"
+        "movq (%0, %2),    %%xmm1         \n\t"
+        "movq (%0, %2,2),  %%xmm2         \n\t"
+        "movq (%0, %3),    %%xmm3         \n\t"
+        "lea (%0,%2,4), %0                \n\t"
+        "punpcklbw %%xmm7, %%xmm0         \n\t"
+        "punpcklbw %%xmm7, %%xmm1         \n\t"
+        "punpcklbw %%xmm7, %%xmm2         \n\t"
+        "punpcklbw %%xmm7, %%xmm3         \n\t"
+        "movdqa %%xmm0,      (%1)         \n\t"
+        "movdqa %%xmm1,    16(%1)         \n\t"
+        "movdqa %%xmm2,    32(%1)         \n\t"
+        "movdqa %%xmm3,    48(%1)         \n\t"
+        "movq (%0),        %%xmm0         \n\t"
+        "movq (%0, %2),    %%xmm1         \n\t"
+        "movq (%0, %2,2),  %%xmm2         \n\t"
+        "movq (%0, %3),    %%xmm3         \n\t"
+        "punpcklbw %%xmm7, %%xmm0         \n\t"
+        "punpcklbw %%xmm7, %%xmm1         \n\t"
+        "punpcklbw %%xmm7, %%xmm2         \n\t"
+        "punpcklbw %%xmm7, %%xmm3         \n\t"
+        "movdqa %%xmm0,    64(%1)         \n\t"
+        "movdqa %%xmm1,    80(%1)         \n\t"
+        "movdqa %%xmm2,    96(%1)         \n\t"
+        "movdqa %%xmm3,   112(%1)         \n\t"
+        : "+r" (pixels)
+        : "r" (block), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3)
+    );
+}
+
 static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)
 {
     asm volatile(
@@ -1375,6 +1409,7 @@ void dsputilenc_init_mmx(DSPContext* c, 
         }
 
         if(mm_flags & MM_SSE2){
+            c->get_pixels = get_pixels_sse2;
             c->sum_abs_dctelem= sum_abs_dctelem_sse2;
             c->hadamard8_diff[0]= hadamard8_diff16_sse2;
             c->hadamard8_diff[1]= hadamard8_diff_sse2;




More information about the ffmpeg-cvslog mailing list