[FFmpeg-devel] [PATCH] fix for roundup issue 2127

Michael Niedermayer michaelni
Sun Jan 2 23:48:56 CET 2011


On Sat, Jan 01, 2011 at 11:47:42PM -0500, Daniel Kang wrote:
> On Sat, Jan 1, 2011 at 11:33 PM, Michael Niedermayer <michaelni at gmx.at>wrote:
> >
> > something like this:
> > "movd  (%3), %%mm0              \n\t"
> > "add   %1, %3                   \n\t"
> > "movd  (%3), %%mm1              \n\t"
> > "movd  (%3,%1), %%mm2           \n\t"
> > "movd  (%3,%1,2), %%mm3         \n\t"
> >
> > would replace lea by add which is faster on some CPUs
> 
> 
> I have removed the leas.

>  dsputil_mmx.h |   37 +++++++++++++++++++------------------
>  1 file changed, 19 insertions(+), 18 deletions(-)
> 9d380b677fd5b19ae4c26b9dda9a5e1ba4a3e233  fix.diff
> From 1a1e5a4c664afdf1511d42544b0856f78548500d Mon Sep 17 00:00:00 2001
> From: Daniel Kang <daniel.d.kang at gmail.com>
> Date: Wed, 29 Dec 2010 22:06:38 -0500
> Subject: [PATCH] 2127 fix
> 
> ---
>  libavcodec/x86/dsputil_mmx.h |   37 +++++++++++++++++++------------------
>  1 files changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
> index d9c2f44..2d53032 100644
> --- a/libavcodec/x86/dsputil_mmx.h
> +++ b/libavcodec/x86/dsputil_mmx.h
> @@ -24,6 +24,7 @@
> 
>  #include <stdint.h>
>  #include "libavcodec/dsputil.h"
> +#include "libavutil/x86_cpu.h"
> 
>  typedef struct { uint64_t a, b; } xmm_reg;
> 
> @@ -94,32 +95,32 @@ extern const double ff_pd_2[2];
>      SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
>      SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
> 
> -static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
> +static inline void transpose4x4(uint8_t *dst, uint8_t *src, x86_reg dst_stride, x86_reg src_stride){
>      __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
> -        "movd  %4, %%mm0                \n\t"
> -        "movd  %5, %%mm1                \n\t"
> -        "movd  %6, %%mm2                \n\t"
> -        "movd  %7, %%mm3                \n\t"
> +        "movd  (%3), %%mm0              \n\t"
> +        "add   %1, %3                   \n\t"
> +        "movd  (%3), %%mm1              \n\t"
> +        "movd  (%3,%1,1), %%mm2         \n\t"
> +        "movd  (%3,%1,2), %%mm3         \n\t"
>          "punpcklbw %%mm1, %%mm0         \n\t"
>          "punpcklbw %%mm3, %%mm2         \n\t"
>          "movq %%mm0, %%mm1              \n\t"
>          "punpcklwd %%mm2, %%mm0         \n\t"
>          "punpckhwd %%mm2, %%mm1         \n\t"
> -        "movd  %%mm0, %0                \n\t"
> +        "movd  %%mm0, (%2)              \n\t"
> +        "add   %0, %2                   \n\t"
>          "punpckhdq %%mm0, %%mm0         \n\t"
> -        "movd  %%mm0, %1                \n\t"
> -        "movd  %%mm1, %2                \n\t"
> +        "movd  %%mm0, (%2)              \n\t"
> +        "movd  %%mm1, (%2,%0,1)         \n\t"
>          "punpckhdq %%mm1, %%mm1         \n\t"
> -        "movd  %%mm1, %3                \n\t"
> -
> -        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
> -          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
> -          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
> -          "=m" (*(uint32_t*)(dst + 3*dst_stride))
> -        :  "m" (*(uint32_t*)(src + 0*src_stride)),
> -           "m" (*(uint32_t*)(src + 1*src_stride)),
> -           "m" (*(uint32_t*)(src + 2*src_stride)),
> -           "m" (*(uint32_t*)(src + 3*src_stride))
> +        "movd  %%mm1, (%2,%0,2)         \n\t"
> +
> +        :  "+&r" (dst_stride),
> +           "+&r" (src_stride),
> +           "+&r" (dst),
> +           "+&r" (src)

only 2 of these are written to thus only 2 need a +&

[...]
--
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are too smart to engage in politics are punished by being
governed by those who are dumber. -- Plato 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20110102/a5f4e629/attachment.pgp>



More information about the ffmpeg-devel mailing list