[FFmpeg-devel] [FFmpeg-Devel][PATCH 1/5] postproc: Replaced inline asm for prefetching with prefetch macros

Michael Niedermayer michaelni at gmx.at
Wed Apr 1 22:59:34 CEST 2015


On Wed, Apr 01, 2015 at 02:36:01PM -0400, Tucker DiNapoli wrote:
> These patches are updates to patches previously posted to the mailing lists, 
> with some bugs fixed and the reasoning behind some changes expanded on.
> 
> This addes macros in postprocess.c that use inline asm for x86,
> __builtin_prefetch if using a recent enough gcc compatable compiler, and
> that does nothing otherwise. Inline asm in postprocess_template.c was
> replaced by these macros.
> ---
>  libpostproc/postprocess.c          | 10 ++++++
>  libpostproc/postprocess_template.c | 63 +++++---------------------------------
>  2 files changed, 18 insertions(+), 55 deletions(-)
> 
> diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
> index 9d89782..f8d28ba 100644
> --- a/libpostproc/postprocess.c
> +++ b/libpostproc/postprocess.c
> @@ -197,6 +197,16 @@ static inline void prefetcht2(const void *p)
>          : : "r" (p)
>      );
>  }
> +#elif AV_GCC_VERSION_AT_LEAST(3,2)
> +#define prefetchnta(p) __builtin_prefetch(p,0,0)
> +#define prefetcht0(p) __builtin_prefetch(p,0,1)
> +#define prefetcht1(p) __builtin_prefetch(p,0,2)
> +#define prefetcht2(p) __builtin_prefetch(p,0,3)
> +#else
> +#define prefetchnta(p)
> +#define prefetcht0(p)
> +#define prefetcht1(p)
> +#define prefetcht2(p)
>  #endif
>  
>  /* The horizontal functions exist only in C because the MMX
> diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
> index 16e441a..6377ea7 100644
> --- a/libpostproc/postprocess_template.c
> +++ b/libpostproc/postprocess_template.c
> @@ -3368,34 +3368,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
>          // finish 1 block before the next otherwise we might have a problem
>          // with the L1 Cache of the P4 ... or only a few blocks at a time or something
>          for(x=0; x<width; x+=BLOCK_SIZE){
> -
> -#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
> -/*
> -            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
> -            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
> -            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
> -            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
> -*/
> -
> -            __asm__(
> -                "mov %4, %%"REG_a"              \n\t"
> -                "shr $2, %%"REG_a"              \n\t"
> -                "and $6, %%"REG_a"              \n\t"
> -                "add %5, %%"REG_a"              \n\t"
> -                "mov %%"REG_a", %%"REG_d"       \n\t"
> -                "imul %1, %%"REG_a"             \n\t"
> -                "imul %3, %%"REG_d"             \n\t"
> -                "prefetchnta 32(%%"REG_a", %0)  \n\t"
> -                "prefetcht0 32(%%"REG_d", %2)   \n\t"
> -                "add %1, %%"REG_a"              \n\t"
> -                "add %3, %%"REG_d"              \n\t"
> -                "prefetchnta 32(%%"REG_a", %0)  \n\t"
> -                "prefetcht0 32(%%"REG_d", %2)   \n\t"
> -                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
> -                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
> -                : "%"REG_a, "%"REG_d
> -            );
> -#endif
> +            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
> +            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
> +            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
> +            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);

this will fail on older CPUs which do not support prefetch*


[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Into a blind darkness they enter who follow after the Ignorance,
they as if into a greater darkness enter who devote themselves
to the Knowledge alone. -- Isha Upanishad
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150401/248cc393/attachment.asc>


More information about the ffmpeg-devel mailing list