[FFmpeg-devel] [FFmpeg-Devel][PATCH 1/5] postproc: Replaced inline asm for prefetching with prefetch macros

Tucker DiNapoli t.dinapoli42 at gmail.com
Wed Apr 1 20:36:01 CEST 2015


These patches are updates to patches previously posted to the mailing lists, 
with some bugs fixed and the reasoning behind some changes expanded on.

This addes macros in postprocess.c that use inline asm for x86,
__builtin_prefetch if using a recent enough gcc compatable compiler, and
that does nothing otherwise. Inline asm in postprocess_template.c was
replaced by these macros.
---
 libpostproc/postprocess.c          | 10 ++++++
 libpostproc/postprocess_template.c | 63 +++++---------------------------------
 2 files changed, 18 insertions(+), 55 deletions(-)

diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 9d89782..f8d28ba 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -197,6 +197,16 @@ static inline void prefetcht2(const void *p)
         : : "r" (p)
     );
 }
+#elif AV_GCC_VERSION_AT_LEAST(3,2)
+#define prefetchnta(p) __builtin_prefetch(p,0,0)
+#define prefetcht0(p) __builtin_prefetch(p,0,1)
+#define prefetcht1(p) __builtin_prefetch(p,0,2)
+#define prefetcht2(p) __builtin_prefetch(p,0,3)
+#else
+#define prefetchnta(p)
+#define prefetcht0(p)
+#define prefetcht1(p)
+#define prefetcht2(p)
 #endif
 
 /* The horizontal functions exist only in C because the MMX
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 16e441a..6377ea7 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3368,34 +3368,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         // finish 1 block before the next otherwise we might have a problem
         // with the L1 Cache of the P4 ... or only a few blocks at a time or something
         for(x=0; x<width; x+=BLOCK_SIZE){
-
-#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
-/*
-            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-            __asm__(
-                "mov %4, %%"REG_a"              \n\t"
-                "shr $2, %%"REG_a"              \n\t"
-                "and $6, %%"REG_a"              \n\t"
-                "add %5, %%"REG_a"              \n\t"
-                "mov %%"REG_a", %%"REG_d"       \n\t"
-                "imul %1, %%"REG_a"             \n\t"
-                "imul %3, %%"REG_d"             \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                "add %1, %%"REG_a"              \n\t"
-                "add %3, %%"REG_d"              \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
-                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
-                : "%"REG_a, "%"REG_d
-            );
-#endif
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
                               srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
@@ -3474,33 +3450,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             uint8_t *dstBlockStart = dstBlock;
             const uint8_t *srcBlockStart = srcBlock;
           for(; x < endx; x+=BLOCK_SIZE){
-#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
-/*
-            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-            __asm__(
-                "mov %4, %%"REG_a"              \n\t"
-                "shr $2, %%"REG_a"              \n\t"
-                "and $6, %%"REG_a"              \n\t"
-                "add %5, %%"REG_a"              \n\t"
-                "mov %%"REG_a", %%"REG_d"       \n\t"
-                "imul %1, %%"REG_a"             \n\t"
-                "imul %3, %%"REG_d"             \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                "add %1, %%"REG_a"              \n\t"
-                "add %3, %%"REG_d"              \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
-                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
-                : "%"REG_a, "%"REG_d
-            );
-#endif
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
                               srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
-- 
2.3.3



More information about the ffmpeg-devel mailing list