[FFmpeg-cvslog] x86/dsputil_mmx: support 4 sample edges

Michael Niedermayer git at videolan.org
Fri Jun 22 17:28:57 CEST 2012


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Fri Jun 22 16:12:54 2012 +0200| [fba18ef8ccdbe62d1fa08bebcacbf2158c60f4c6] | committer: Michael Niedermayer

x86/dsputil_mmx: support 4 sample edges

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fba18ef8ccdbe62d1fa08bebcacbf2158c60f4c6
---

 libavcodec/x86/dsputil_mmx.c |   21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index a4e3239..31277d5 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -807,7 +807,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
             : "+r"(ptr)
             : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
             );
-    } else {
+    } else if(w==16){
         __asm__ volatile (
             "1:                                 \n\t"
             "movd            (%0), %%mm0        \n\t"
@@ -828,6 +828,25 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
             : "+r"(ptr)
             : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
             );
+    } else {
+        av_assert1(w == 4);
+        __asm__ volatile (
+            "1:                             \n\t"
+            "movd            (%0), %%mm0    \n\t"
+            "punpcklbw      %%mm0, %%mm0    \n\t"
+            "punpcklwd      %%mm0, %%mm0    \n\t"
+            "movd           %%mm0, -4(%0)   \n\t"
+            "movd      -4(%0, %2), %%mm1    \n\t"
+            "punpcklbw      %%mm1, %%mm1    \n\t"
+            "punpckhwd      %%mm1, %%mm1    \n\t"
+            "punpckhdq      %%mm1, %%mm1    \n\t"
+            "movd           %%mm1, (%0, %2) \n\t"
+            "add               %1, %0       \n\t"
+            "cmp               %3, %0       \n\t"
+            "jb                1b           \n\t"
+            : "+r"(ptr)
+            : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
+            );
     }
 
     /* top and bottom (and hopefully also the corners) */



More information about the ffmpeg-cvslog mailing list