[FFmpeg-devel] [PATCH] Fix compilation on clang 2.8, which doesn't support 3dnow inline asm.
Ronald S. Bultje
rsbultje
Fri Feb 11 19:40:12 CET 2011
---
configure | 7 ++++---
libavcodec/x86/cavsdsp_mmx.c | 4 ++++
libavcodec/x86/dsputil_mmx.c | 39 ++++++++++++++++++++++++++++++++-------
libavcodec/x86/dsputilenc_mmx.c | 4 ++++
libavcodec/x86/fmtconvert_mmx.c | 10 ++++++++++
libavcodec/x86/h264_qpel_mmx.c | 2 ++
libswscale/rgb2rgb.c | 4 ++++
7 files changed, 60 insertions(+), 10 deletions(-)
diff --git a/configure b/configure
index a3ca15d..7703c80 100755
--- a/configure
+++ b/configure
@@ -2649,9 +2649,10 @@ int main(void) {
}
EOF
- # check whether binutils is new enough to compile SSSE3/MMX2
- enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"'
- enabled mmx2 && check_asm mmx2 '"pmaxub %mm0, %mm1"'
+ # check whether binutils is new enough to compile SSSE3/MMX2/3dnow
+ enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"'
+ enabled mmx2 && check_asm mmx2 '"pmaxub %mm0, %mm1"'
+ enabled amd3dnow && check_asm amd3dnow '"pavgusb %mm0, %mm1"'
check_asm bswap '"bswap %%eax" ::: "%eax"'
diff --git a/libavcodec/x86/cavsdsp_mmx.c b/libavcodec/x86/cavsdsp_mmx.c
index 0f5fdaa..e17d18c 100644
--- a/libavcodec/x86/cavsdsp_mmx.c
+++ b/libavcodec/x86/cavsdsp_mmx.c
@@ -425,8 +425,10 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui
"pavgb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
+#if HAVE_AMD3DNOW
QPEL_CAVS(put_, PUT_OP, 3dnow)
QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
+#endif
QPEL_CAVS(put_, PUT_OP, mmx2)
QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
@@ -476,5 +478,7 @@ void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx);
+#if HAVE_AMD3DNOW
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
+#endif
}
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 39bf3f2..ceb9f5d 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -190,6 +190,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
/***********************************/
/* 3Dnow specific */
+#if HAVE_AMD3DNOW
#define DEF(x) x ## _3dnow
#define PAVGB "pavgusb"
#define OP_AVG PAVGB
@@ -199,6 +200,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
#undef DEF
#undef PAVGB
#undef OP_AVG
+#endif
/***********************************/
/* MMX2 specific */
@@ -967,7 +969,7 @@ PAETH(ssse3, ABS3_SSSE3)
"packuswb %%mm5, %%mm5 \n\t"\
OP(%%mm5, out, %%mm7, d)
-#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
+#define QPEL_BASE_MMX2_16(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
uint64_t temp;\
\
@@ -1087,7 +1089,8 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, in
: "memory"\
);\
}\
-\
+
+#define QPEL_BASE_3DNOW_16(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
int i;\
int16_t temp[16];\
@@ -1134,7 +1137,8 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
src+=srcStride;\
}\
}\
-\
+
+#define QPEL_BASE_MMX2_8(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
@@ -1198,7 +1202,8 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int
: "memory"\
);\
}\
-\
+
+#define QPEL_BASE_3DNOW_8(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
int i;\
int16_t temp[8];\
@@ -1603,12 +1608,26 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride
"pavgb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
+#if HAVE_AMD3DNOW
+#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
+QPEL_BASE_MMX2_16(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \
+QPEL_BASE_MMX2_8(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \
+QPEL_BASE_3DNOW_16(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \
+QPEL_BASE_3DNOW_8(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)
+#else
+#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
+QPEL_BASE_MMX2_16(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \
+QPEL_BASE_MMX2_8(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)
+#endif
+
QPEL_BASE(put_ , ff_pw_16, _ , PUT_OP, PUT_OP)
QPEL_BASE(avg_ , ff_pw_16, _ , AVG_MMX2_OP, AVG_3DNOW_OP)
QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
+#if HAVE_AMD3DNOW
QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, 3dnow)
QPEL_OP(avg_ , ff_pw_16, _ , AVG_3DNOW_OP, 3dnow)
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
+#endif
QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)
QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
@@ -1654,10 +1673,12 @@ QPEL_2TAP(put_, 16, mmx2)
QPEL_2TAP(avg_, 16, mmx2)
QPEL_2TAP(put_, 8, mmx2)
QPEL_2TAP(avg_, 8, mmx2)
+#if HAVE_AMD3DNOW
QPEL_2TAP(put_, 16, 3dnow)
QPEL_2TAP(avg_, 16, 3dnow)
QPEL_2TAP(put_, 8, 3dnow)
QPEL_2TAP(avg_, 8, 3dnow)
+#endif
#if 0
@@ -1887,7 +1908,9 @@ static void name(void *mem, int stride, int h){\
}while(--h);\
}
PREFETCH(prefetch_mmx2, prefetcht0)
+#if HAVE_AMD3DNOW
PREFETCH(prefetch_3dnow, prefetch)
+#endif
#undef PREFETCH
#include "h264_qpel_mmx.c"
@@ -2640,6 +2663,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
ff_vc1dsp_init_mmx(c, avctx);
c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
+#if HAVE_AMD3DNOW
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
c->prefetch = prefetch_3dnow;
@@ -2700,6 +2724,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
#endif
+#endif /* HAVE_AMD3DNOW */
}
@@ -2758,11 +2783,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#endif
- if(mm_flags & AV_CPU_FLAG_3DNOW){
+ if(HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW){
c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
c->vector_fmul = vector_fmul_3dnow;
}
- if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
+ if(HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT){
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
#if HAVE_6REGS
c->vector_fmul_window = vector_fmul_window_3dnow2;
@@ -2788,7 +2813,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->scalarproduct_float = ff_scalarproduct_float_sse;
#endif
}
- if(mm_flags & AV_CPU_FLAG_3DNOW)
+ if(HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW)
c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
if(mm_flags & AV_CPU_FLAG_SSE2){
#if HAVE_YASM
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index bd31205..6966c91 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -1059,6 +1059,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si
#undef SCALE_OFFSET
#undef PMULHRW
+#if HAVE_AMD3DNOW
#define DEF(x) x ## _3dnow
#define SET_RND(x)
#define SCALE_OFFSET 0
@@ -1072,6 +1073,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si
#undef SET_RND
#undef SCALE_OFFSET
#undef PMULHRW
+#endif
#if HAVE_SSSE3
#undef PHADDD
@@ -1180,12 +1182,14 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#endif
+#if HAVE_AMD3DNOW
if(mm_flags & AV_CPU_FLAG_3DNOW){
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->try_8x8basis= try_8x8basis_3dnow;
}
c->add_8x8basis= add_8x8basis_3dnow;
}
+#endif
}
dsputil_init_pix_mmx(c, avctx);
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index ea41f73..ac1367c 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -70,6 +70,7 @@ static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mu
);
}
+#if HAVE_AMD3DNOW
static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
x86_reg reglen = len;
// not bit-exact: pf2id uses different rounding than C and SSE
@@ -93,6 +94,7 @@ static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
:"+r"(reglen), "+r"(dst), "+r"(src)
);
}
+#endif
static void float_to_int16_sse(int16_t *dst, const float *src, long len){
x86_reg reglen = len;
@@ -180,6 +182,7 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
+#if HAVE_AMD3DNOW
FLOAT_TO_INT16_INTERLEAVE(3dnow,
"1: \n"
"pf2id (%2,%0), %%mm0 \n"
@@ -197,6 +200,7 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow,
"js 1b \n"
"femms \n"
)
+#endif
FLOAT_TO_INT16_INTERLEAVE(sse,
"1: \n"
@@ -228,12 +232,14 @@ FLOAT_TO_INT16_INTERLEAVE(sse2,
"js 1b \n"
)
+#if HAVE_AMD3DNOW
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
if(channels==6)
ff_float_to_int16_interleave6_3dn2(dst, src, len);
else
float_to_int16_interleave_3dnow(dst, src, len, channels);
}
+#endif
void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
{
@@ -241,17 +247,21 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_MMX) {
+#if HAVE_AMD3DNOW
if(mm_flags & AV_CPU_FLAG_3DNOW){
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16 = float_to_int16_3dnow;
c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
}
}
+#endif
+#if HAVE_AMD3DNOWEXT
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
}
}
+#endif
if(mm_flags & AV_CPU_FLAG_SSE){
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
c->float_to_int16 = float_to_int16_sse;
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index d8ceca1..909b2b8 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -1191,7 +1191,9 @@ QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3)
#endif
#undef PAVGB
+#if HAVE_AMD3DNOW
H264_MC_4816(3dnow)
+#endif
H264_MC_4816(mmx2)
H264_MC_816(H264_MC_V, sse2)
H264_MC_816(H264_MC_HV, sse2)
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 7226853..eae28ec 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -184,11 +184,13 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_SSE2
#undef COMPILE_TEMPLATE_AMD3DNOW
+#if HAVE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_SSE2 1
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNOW
#include "rgb2rgb_template.c"
+#endif
#endif //ARCH_X86 || ARCH_X86_64
@@ -206,8 +208,10 @@ void sws_rgb2rgb_init(int flags)
rgb2rgb_init_SSE2();
else if (flags & SWS_CPU_CAPS_MMX2)
rgb2rgb_init_MMX2();
+#if HAVE_AMD3DNOW
else if (flags & SWS_CPU_CAPS_3DNOW)
rgb2rgb_init_3DNOW();
+#endif
else if (flags & SWS_CPU_CAPS_MMX)
rgb2rgb_init_MMX();
else
--
1.7.2.1
More information about the ffmpeg-devel
mailing list