[FFmpeg-cvslog] r14138 - trunk/libavcodec/i386/dsputil_mmx.c

michael subversion
Wed Jul 9 09:21:12 CEST 2008


Author: michael
Date: Wed Jul  9 09:21:12 2008
New Revision: 14138

Log:
float_to_int16_sse2()
20% faster than sse


Modified:
   trunk/libavcodec/i386/dsputil_mmx.c

Modified: trunk/libavcodec/i386/dsputil_mmx.c
==============================================================================
--- trunk/libavcodec/i386/dsputil_mmx.c	(original)
+++ trunk/libavcodec/i386/dsputil_mmx.c	Wed Jul  9 09:21:12 2008
@@ -2066,6 +2066,23 @@ static void float_to_int16_sse(int16_t *
     );
 }
 
+static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
+    asm volatile(
+        "add        %0          , %0        \n\t"
+        "lea         (%2,%0,2)  , %2        \n\t"
+        "add        %0          , %1        \n\t"
+        "neg        %0                      \n\t"
+        "1:                                 \n\t"
+        "cvtps2dq    (%2,%0,2)  , %%xmm0    \n\t"
+        "cvtps2dq  16(%2,%0,2)  , %%xmm1    \n\t"
+        "packssdw   %%xmm1      , %%xmm0    \n\t"
+        "movdqa     %%xmm0      ,  (%1,%0)  \n\t"
+        "add        $16         , %0        \n\t"
+        " js 1b                             \n\t"
+        :"+r"(len), "+r"(dst), "+r"(src)
+    );
+}
+
 extern void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
 extern void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
 extern void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
@@ -2441,6 +2458,9 @@ void dsputil_init_mmx(DSPContext* c, AVC
             c->vector_fmul_reverse = vector_fmul_reverse_sse;
             c->vector_fmul_add_add = vector_fmul_add_add_sse;
         }
+        if(mm_flags & MM_SSE2){
+            c->float_to_int16 = float_to_int16_sse2;
+        }
         if(mm_flags & MM_3DNOW)
             c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
     }




More information about the ffmpeg-cvslog mailing list