[FFmpeg-devel] [PATCH 4/6] pp: move optim templating config to template itself.

Wed Nov 14 23:29:58 CET 2012

Also avoid messing up the HAVE_* flags.
---
 libpostproc/postprocess.c                  |  73 ++--------
 libpostproc/postprocess_altivec_template.c |   2 +-
 libpostproc/postprocess_template.c         | 226 +++++++++++++++++------------
 3 files changed, 146 insertions(+), 155 deletions(-)

diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index f0d97d3..7001778 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -538,84 +538,33 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 //Plain C versions
 //we always compile C for testing which needs bitexactness
-#define COMPILE_C
+#define COMPILE_C 1
+#include "postprocess_template.c"
 
 #if HAVE_ALTIVEC
-#define COMPILE_ALTIVEC
+#define COMPILE_ALTIVEC 1
+#include "postprocess_altivec_template.c"
+#include "postprocess_template.c"
 #endif //HAVE_ALTIVEC
 
 #if ARCH_X86 && HAVE_INLINE_ASM
 
 #if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
-#define COMPILE_MMX
-#endif
-
-#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
-#define COMPILE_MMX2
-#endif
-
-#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
-#define COMPILE_3DNOW
-#endif
-#endif /* ARCH_X86 */
-
-#undef HAVE_MMX_INLINE
-#define HAVE_MMX_INLINE 0
-#undef HAVE_MMXEXT_INLINE
-#define HAVE_MMXEXT_INLINE 0
-#undef HAVE_AMD3DNOW_INLINE
-#define HAVE_AMD3DNOW_INLINE 0
-#undef HAVE_ALTIVEC
-#define HAVE_ALTIVEC 0
-
-#ifdef COMPILE_C
-#define RENAME(a) a ## _C
+#define COMPILE_MMX 1
 #include "postprocess_template.c"
 #endif
 
-#ifdef COMPILE_ALTIVEC
-#undef RENAME
-#undef HAVE_ALTIVEC
-#define HAVE_ALTIVEC 1
-#define RENAME(a) a ## _altivec
-#include "postprocess_altivec_template.c"
-#include "postprocess_template.c"
-#endif
-
-//MMX versions
-#ifdef COMPILE_MMX
-#undef RENAME
-#undef HAVE_MMX_INLINE
-#define HAVE_MMX_INLINE 1
-#define RENAME(a) a ## _MMX
-#include "postprocess_template.c"
-#endif
-
-//MMX2 versions
-#ifdef COMPILE_MMX2
-#undef RENAME
-#undef HAVE_MMX_INLINE
-#undef HAVE_MMXEXT_INLINE
-#define HAVE_MMX_INLINE 1
-#define HAVE_MMXEXT_INLINE 1
-#define RENAME(a) a ## _MMX2
+#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
+#define COMPILE_MMXEXT 1
 #include "postprocess_template.c"
 #endif
 
-//3DNOW versions
-#ifdef COMPILE_3DNOW
-#undef RENAME
-#undef HAVE_MMX_INLINE
-#undef HAVE_MMXEXT_INLINE
-#undef HAVE_AMD3DNOW_INLINE
-#define HAVE_MMX_INLINE 1
-#define HAVE_MMXEXT_INLINE 0
-#define HAVE_AMD3DNOW_INLINE 1
-#define RENAME(a) a ## _3DNow
+#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
+#define COMPILE_3DNOW 1
 #include "postprocess_template.c"
 #endif
 
-// minor note: the HAVE_xyz is messed up after that line so do not use it.
+#endif /* ARCH_X86 && HAVE_INLINE_ASM */
 
 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
diff --git a/libpostproc/postprocess_altivec_template.c b/libpostproc/postprocess_altivec_template.c
index 3a37562..fa6ebe2 100644
--- a/libpostproc/postprocess_altivec_template.c
+++ b/libpostproc/postprocess_altivec_template.c
@@ -825,7 +825,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
 #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)
 #define do_a_deblock_altivec(a...) do_a_deblock_C(a)
 
-static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
+static inline void tempNoiseReducer_altivec(uint8_t *src, int stride,
                                             uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
 {
     const vector signed char neg1 = vec_splat_s8(-1);
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index f45ccf8..6611670 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -25,37 +25,72 @@
 
 #include "libavutil/x86/asm.h"
 
+#ifdef COMPILE_C
+# define RENAME(a) a ## _C
+#else
+# define COMPILE_C 0
+#endif
+
+#ifdef COMPILE_ALTIVEC
+# define COMPILE_ALTIVEC 1
+# define RENAME(a) a ## _altivec
+#else
+# define COMPILE_ALTIVEC 0
+#endif
+
+#ifdef COMPILE_MMX
+# define RENAME(a) a ## _MMX
+#else
+# define COMPILE_MMX 0
+#endif
+
+#ifdef COMPILE_MMXEXT
+# undef  COMPILE_MMX
+# define COMPILE_MMX 1
+# define RENAME(a) a ## _MMX2
+#else
+# define COMPILE_MMXEXT 0
+#endif
+
+#ifdef COMPILE_3DNOW
+# undef  COMPILE_MMX
+# define COMPILE_MMX 1
+# define RENAME(a) a ## _3DNow
+#else
+# define COMPILE_3DNOW 0
+#endif
+
 #undef REAL_PAVGB
 #undef PAVGB
 #undef PMINUB
 #undef PMAXUB
 
-#if   HAVE_MMXEXT_INLINE
+#if   COMPILE_MMXEXT
 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif HAVE_AMD3DNOW_INLINE
+#elif COMPILE_3DNOW
 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
 #endif
 #define PAVGB(a,b)  REAL_PAVGB(a,b)
 
-#if   HAVE_MMXEXT_INLINE
+#if   COMPILE_MMXEXT
 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
-#elif HAVE_MMX_INLINE
+#elif COMPILE_MMX
 #define PMINUB(b,a,t) \
     "movq " #a ", " #t " \n\t"\
     "psubusb " #b ", " #t " \n\t"\
     "psubb " #t ", " #a " \n\t"
 #endif
 
-#if   HAVE_MMXEXT_INLINE
+#if   COMPILE_MMXEXT
 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
-#elif HAVE_MMX_INLINE
+#elif COMPILE_MMX
 #define PMAXUB(a,b) \
     "psubusb " #a ", " #b " \n\t"\
     "paddb " #a ", " #b " \n\t"
 #endif
 
 //FIXME? |255-0| = 1 (should not be a problem ...)
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
 /**
  * Check if the middle 8x8 Block in the given 8x16 block is flat
  */
@@ -135,7 +170,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
         "psubusb %%mm3, %%mm4                   \n\t"
 
         "                                       \n\t"
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
         "pxor %%mm7, %%mm7                      \n\t"
         "psadbw %%mm7, %%mm0                    \n\t"
 #else
@@ -169,16 +204,16 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
         return 2;
     }
 }
-#endif //HAVE_MMX_INLINE
+#endif //COMPILE_MMX
 
 /**
  * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
  */
-#if !HAVE_ALTIVEC
+#if !COMPILE_ALTIVEC
 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     src+= stride*3;
     __asm__ volatile(        //"movv %0 %1 %2\n\t"
         "movq %2, %%mm0                         \n\t"  // QP,..., QP
@@ -305,7 +340,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
         : "%"REG_a, "%"REG_c
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
     const int l1= stride;
     const int l2= stride + l1;
     const int l3= stride + l2;
@@ -344,9 +379,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 
         src++;
     }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
-#endif //HAVE_ALTIVEC
+#endif //COMPILE_ALTIVEC
 
 /**
  * Experimental Filter 1
@@ -357,7 +392,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
  */
 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     src+= stride*3;
 
     __asm__ volatile(
@@ -443,7 +478,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
         : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
         : "%"REG_a, "%"REG_c
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
 
     const int l1= stride;
     const int l2= stride + l1;
@@ -477,13 +512,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
         }
         src++;
     }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
 
-#if !HAVE_ALTIVEC
+#if !COMPILE_ALTIVEC
 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
 /*
     uint8_t tmp[16];
     const int l1= stride;
@@ -764,7 +799,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
     }
 }
 */
-#elif HAVE_MMX_INLINE
+#elif COMPILE_MMX
     DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
     src+= stride*4;
     __asm__ volatile(
@@ -872,7 +907,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "movq (%3), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
         "movq 8(%3), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
         "movq %%mm7, %%mm6                      \n\t" // 0
         "psubw %%mm0, %%mm6                     \n\t"
         "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
@@ -904,7 +939,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
         "pminsw %%mm2, %%mm0                    \n\t"
         "pminsw %%mm3, %%mm1                    \n\t"
 #else
@@ -968,7 +1003,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "pand %%mm2, %%mm4                      \n\t"
         "pand %%mm3, %%mm5                      \n\t"
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
         "pminsw %%mm0, %%mm4                    \n\t"
         "pminsw %%mm1, %%mm5                    \n\t"
 #else
@@ -995,7 +1030,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
         : "%"REG_a
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
     const int l1= stride;
     const int l2= stride + l1;
     const int l3= stride + l2;
@@ -1033,14 +1068,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         }
         src++;
     }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
-#endif //HAVE_ALTIVEC
+#endif //COMPILE_ALTIVEC
 
-#if !HAVE_ALTIVEC
+#if !COMPILE_ALTIVEC
 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     DECLARE_ALIGNED(8, uint64_t, tmp)[3];
     __asm__ volatile(
         "pxor %%mm6, %%mm6                      \n\t"
@@ -1060,7 +1095,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 
 #undef REAL_FIND_MIN_MAX
 #undef FIND_MIN_MAX
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
 #define REAL_FIND_MIN_MAX(addr)\
         "movq " #addr ", %%mm0                  \n\t"\
         "pminub %%mm0, %%mm7                    \n\t"\
@@ -1087,7 +1122,7 @@ FIND_MIN_MAX((%0, %1, 8))
 
         "movq %%mm7, %%mm4                      \n\t"
         "psrlq $8, %%mm7                        \n\t"
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
         "pminub %%mm4, %%mm7                    \n\t" // min of pixels
         "pshufw $0xF9, %%mm7, %%mm4             \n\t"
         "pminub %%mm4, %%mm7                    \n\t" // min of pixels
@@ -1112,7 +1147,7 @@ FIND_MIN_MAX((%0, %1, 8))
 
         "movq %%mm6, %%mm4                      \n\t"
         "psrlq $8, %%mm6                        \n\t"
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
         "pmaxub %%mm4, %%mm6                    \n\t" // max of pixels
         "pshufw $0xF9, %%mm6, %%mm4             \n\t"
         "pmaxub %%mm4, %%mm6                    \n\t"
@@ -1266,7 +1301,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
         : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
         : "%"REG_a, "%"REG_d
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
     int y;
     int min=255;
     int max=0;
@@ -1383,9 +1418,9 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
 //        src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
     }
 #endif
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
-#endif //HAVE_ALTIVEC
+#endif //COMPILE_ALTIVEC
 
 /**
  * Deinterlace the given block by linearly interpolating every second line.
@@ -1395,7 +1430,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
  */
 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     src+= 4*stride;
     __asm__ volatile(
         "lea (%0, %1), %%"REG_a"                \n\t"
@@ -1448,7 +1483,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
  */
 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     src+= stride*3;
     __asm__ volatile(
         "lea (%0, %1), %%"REG_a"                \n\t"
@@ -1490,7 +1525,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc,
         : : "r" (src), "r" ((x86_reg)stride)
         : "%"REG_a, "%"REG_d, "%"REG_c
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
     int x;
     src+= stride*3;
     for(x=0; x<8; x++){
@@ -1500,7 +1535,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc,
         src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
         src++;
     }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
 
 /**
@@ -1512,7 +1547,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc,
  */
 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     src+= stride*4;
     __asm__ volatile(
         "lea (%0, %1), %%"REG_a"                \n\t"
@@ -1561,7 +1596,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
         : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
         : "%"REG_a, "%"REG_d
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
     int x;
     src+= stride*4;
     for(x=0; x<8; x++){
@@ -1579,7 +1614,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
 
         src++;
     }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
 
 /**
@@ -1591,7 +1626,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
  */
 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     src+= stride*4;
     __asm__ volatile(
         "lea (%0, %1), %%"REG_a"                \n\t"
@@ -1651,7 +1686,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
         : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
         : "%"REG_a, "%"REG_d
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
     int x;
     src+= stride*4;
     for(x=0; x<8; x++){
@@ -1680,7 +1715,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
 
         src++;
     }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
 
 /**
@@ -1692,7 +1727,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
  */
 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     src+= 4*stride;
     __asm__ volatile(
         "lea (%0, %1), %%"REG_a"                \n\t"
@@ -1739,7 +1774,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
         : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
         : "%"REG_a, "%"REG_d
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
     int a, b, c, x;
     src+= 4*stride;
 
@@ -1782,7 +1817,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
         src += 4;
         tmp += 4;
     }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
 
 /**
@@ -1793,9 +1828,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
  */
 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
 {
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
     src+= 4*stride;
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
     __asm__ volatile(
         "lea (%0, %1), %%"REG_a"                \n\t"
         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
@@ -1885,8 +1920,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
         : : "r" (src), "r" ((x86_reg)stride)
         : "%"REG_a, "%"REG_d
     );
-#endif //HAVE_MMXEXT_INLINE
-#else //HAVE_MMX_INLINE
+#endif //COMPILE_MMXEXT
+#else //COMPILE_MMX
     int x, y;
     src+= 4*stride;
     // FIXME - there should be a way to do a few columns in parallel like w/mmx
@@ -1905,10 +1940,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
         }
         src++;
     }
-#endif //HAVE_MMX_INLINE
+#endif //COMPILE_MMX
 }
 
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
 /**
  * Transpose and shift the given 8x8 Block into dst1 and dst2.
  */
@@ -2073,10 +2108,10 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
         : "%"REG_a, "%"REG_d
     );
 }
-#endif //HAVE_MMX_INLINE
+#endif //COMPILE_MMX
 //static long test=0;
 
-#if !HAVE_ALTIVEC
+#if !COMPILE_ALTIVEC
 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
                                     uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
 {
@@ -2087,7 +2122,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
 
 #define FAST_L2_DIFF
 //#define L1_DIFF //u should change the thresholds too if u try that one
-#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#if COMPILE_MMXEXT || COMPILE_3DNOW
     __asm__ volatile(
         "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride
         "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride
@@ -2375,7 +2410,7 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
         :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
         : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
     );
-#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#else //COMPILE_MMXEXT || COMPILE_3DNOW
 {
     int y;
     int d=0;
@@ -2458,11 +2493,11 @@ Switch between
         }
     }
 }
-#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
+#endif //COMPILE_MMXEXT || COMPILE_3DNOW
 }
-#endif //HAVE_ALTIVEC
+#endif //COMPILE_ALTIVEC
 
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
 /**
  * accurate deblock filter
  */
@@ -2865,7 +2900,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "movq (%4), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
             "movq 8(%4), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
             "movq %%mm7, %%mm6                      \n\t" // 0
             "psubw %%mm0, %%mm6                     \n\t"
             "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
@@ -2897,7 +2932,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
             "pminsw %%mm2, %%mm0                    \n\t"
             "pminsw %%mm3, %%mm1                    \n\t"
 #else
@@ -2961,7 +2996,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "pand %%mm2, %%mm4                      \n\t"
             "pand %%mm3, %%mm5                      \n\t"
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
             "pminsw %%mm0, %%mm4                    \n\t"
             "pminsw %%mm1, %%mm5                    \n\t"
 #else
@@ -2998,7 +3033,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 }
     } */
 }
-#endif //HAVE_MMX_INLINE
+#endif //COMPILE_MMX
 
 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
                                 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
@@ -3013,18 +3048,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
                                      int levelFix, int64_t *packedOffsetAndScale)
 {
-#if !HAVE_MMX_INLINE
+#if !COMPILE_MMX
     int i;
 #endif
     if(levelFix){
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
     __asm__ volatile(
         "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset
         "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale
         "lea (%2,%4), %%"REG_a"         \n\t"
         "lea (%3,%5), %%"REG_d"         \n\t"
         "pxor %%mm4, %%mm4              \n\t"
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
         "movq " #src1 ", %%mm0          \n\t"\
         "movq " #src1 ", %%mm5          \n\t"\
@@ -3047,7 +3082,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
         "movq %%mm0, " #dst1 "          \n\t"\
         "movq %%mm1, " #dst2 "          \n\t"\
 
-#else //HAVE_MMXEXT_INLINE
+#else //COMPILE_MMXEXT
 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                        \
         "movq " #src1 ", %%mm0          \n\t"\
         "movq " #src1 ", %%mm5          \n\t"\
@@ -3074,7 +3109,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
         "movq %%mm0, " #dst1 "          \n\t"\
         "movq %%mm1, " #dst2 "          \n\t"\
 
-#endif //HAVE_MMXEXT_INLINE
+#endif //COMPILE_MMXEXT
 #define SCALED_CPY(src1, src2, dst1, dst2)\
    REAL_SCALED_CPY(src1, src2, dst1, dst2)
 
@@ -3094,13 +3129,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
         "r" ((x86_reg)dstStride)
         : "%"REG_d
     );
-#else //HAVE_MMX_INLINE
+#else //COMPILE_MMX
     for(i=0; i<8; i++)
         memcpy( &(dst[dstStride*i]),
                 &(src[srcStride*i]), BLOCK_SIZE);
-#endif //HAVE_MMX_INLINE
+#endif //COMPILE_MMX
     }else{
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
     __asm__ volatile(
         "lea (%0,%2), %%"REG_a"                 \n\t"
         "lea (%1,%3), %%"REG_d"                 \n\t"
@@ -3127,11 +3162,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
         "r" ((x86_reg)dstStride)
         : "%"REG_a, "%"REG_d
     );
-#else //HAVE_MMX_INLINE
+#else //COMPILE_MMX
     for(i=0; i<8; i++)
         memcpy( &(dst[dstStride*i]),
                 &(src[srcStride*i]), BLOCK_SIZE);
-#endif //HAVE_MMX_INLINE
+#endif //COMPILE_MMX
     }
 }
 
@@ -3140,7 +3175,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
  */
 static inline void RENAME(duplicate)(uint8_t src[], int stride)
 {
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
     __asm__ volatile(
         "movq (%0), %%mm0               \n\t"
         "add %1, %0                     \n\t"
@@ -3177,7 +3212,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
     int QPCorrecture= 256*256;
 
     int copyAhead;
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
     int i;
 #endif
 
@@ -3190,7 +3225,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
     uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
     //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
 
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
     for(i=0; i<57; i++){
         int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
         int threshold= offset*2 + 1;
@@ -3248,7 +3283,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 
         scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
         c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
         c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
 #else
@@ -3281,7 +3316,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
         for(x=0; x<width; x+=BLOCK_SIZE){
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
 /*
             prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
             prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
@@ -3308,7 +3343,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 : "%"REG_a, "%"REG_d
             );
 
-#elif HAVE_AMD3DNOW_INLINE
+#elif COMPILE_3DNOW
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
             prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@@ -3354,7 +3389,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         //1% speedup if these are here instead of the inner loop
         const uint8_t *srcBlock= &(src[y*srcStride]);
         uint8_t *dstBlock= &(dst[y*dstStride]);
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
         uint8_t *tempBlock1= c.tempBlocks;
         uint8_t *tempBlock2= c.tempBlocks + 8;
 #endif
@@ -3390,7 +3425,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
         for(x=0; x<width; x+=BLOCK_SIZE){
             const int stride= dstStride;
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
             uint8_t *tmpXchg;
 #endif
             if(isColor){
@@ -3404,7 +3439,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
             }
             c.QP= QP;
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
             __asm__ volatile(
                 "movd %1, %%mm7         \n\t"
                 "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
@@ -3417,7 +3452,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 #endif
 
 
-#if HAVE_MMXEXT_INLINE
+#if COMPILE_MMXEXT
 /*
             prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
             prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
@@ -3444,7 +3479,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 : "%"REG_a, "%"REG_d
             );
 
-#elif HAVE_AMD3DNOW_INLINE
+#elif COMPILE_3DNOW
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
             prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@@ -3488,12 +3523,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 }
             }
 
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
             RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 #endif
             /* check if we have a previous block to deblock it with dstBlock */
             if(x - 8 >= 0){
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
                 if(mode & H_X1_FILTER)
                         RENAME(vertX1Filter)(tempBlock1, 16, &c);
                 else if(mode & H_DEBLOCK){
@@ -3514,7 +3549,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 if(mode & H_X1_FILTER)
                     horizX1Filter(dstBlock-4, stride, QP);
                 else if(mode & H_DEBLOCK){
-#if HAVE_ALTIVEC
+#if COMPILE_ALTIVEC
                     DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
                     int t;
                     transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
@@ -3539,7 +3574,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 }else if(mode & H_A_DEBLOCK){
                     RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
                 }
-#endif //HAVE_MMX_INLINE
+#endif //COMPILE_MMX
                 if(mode & DERING){
                 //FIXME filter first line
                     if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
@@ -3557,7 +3592,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             dstBlock+=8;
             srcBlock+=8;
 
-#if HAVE_MMX_INLINE
+#if COMPILE_MMX
             tmpXchg= tempBlock1;
             tempBlock1= tempBlock2;
             tempBlock2 = tmpXchg;
@@ -3597,9 +3632,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
         }*/
     }
-#if   HAVE_AMD3DNOW_INLINE
+#if   COMPILE_3DNOW
     __asm__ volatile("femms");
-#elif HAVE_MMX_INLINE
+#elif COMPILE_MMX
     __asm__ volatile("emms");
 #endif
 
@@ -3629,3 +3664,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
     *c2= c; //copy local context back
 
 }
+
+#undef RENAME
+#undef COMPILE_C
+#undef COMPILE_ALTIVEC
+#undef COMPILE_MMX
+#undef COMPILE_MMXEXT
+#undef COMPILE_3DNOW
-- 
1.8.0