[FFmpeg-devel] [PATCH 08/14] [inline assembly] add mmx clobbers to mpegvideoenc
frederic.recoules at univ-grenoble-alpes.fr
frederic.recoules at univ-grenoble-alpes.fr
Sun Apr 26 22:44:18 EEST 2020
From: Frédéric Recoules <frederic.recoules at orange.fr>
---
libavcodec/x86/mpegvideoenc_qns_template.c | 12 +++++---
libavcodec/x86/mpegvideoencdsp_init.c | 32 ++++++++++++++++++----
2 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/libavcodec/x86/mpegvideoenc_qns_template.c b/libavcodec/x86/mpegvideoenc_qns_template.c
index 882d486205..96325fd8f8 100644
--- a/libavcodec/x86/mpegvideoenc_qns_template.c
+++ b/libavcodec/x86/mpegvideoenc_qns_template.c
@@ -39,8 +39,8 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
av_assert2(FFABS(scale) < MAX_ABS);
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
- SET_RND(mm6);
__asm__ volatile(
+ SET_RND_TPL(mm6)
"pxor %%mm7, %%mm7 \n\t"
"movd %4, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t"
@@ -69,7 +69,9 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
"movd %%mm7, %0 \n\t"
: "+r" (i)
- : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
+ : "r"(basis), "r"(rem), "r"(weight), "g"(scale) COMMA_SET_RND_IN
+ MMX_CLOBBERS_ONLY("mm0", "mm1", "mm5", "mm7"
+ SET_RND_CLOBBER(, "mm6"))
);
return i;
}
@@ -80,8 +82,8 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
if(FFABS(scale) < MAX_ABS){
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
- SET_RND(mm6);
__asm__ volatile(
+ SET_RND_TPL(mm6)
"movd %3, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t"
@@ -99,7 +101,9 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
" jb 1b \n\t"
: "+r" (i)
- : "r"(basis), "r"(rem), "g"(scale)
+ : "r"(basis), "r"(rem), "g"(scale) COMMA_SET_RND_IN
+ MMX_CLOBBERS_ONLY("mm0", "mm1", "mm5"
+ SET_RND_CLOBBER(, "mm6"))
);
}else{
for(i=0; i<8*8; i++){
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
index 532836cec9..8430ec62ea 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -51,17 +51,26 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
"psraw $1, " #y " \n\t"
#define DEF(x) x ## _mmx
#define SET_RND MOVQ_WONE
+#define SET_RND_TPL MOVQ_WONE_TPL
+#define COMMA_SET_RND_IN
+#define SET_RND_CLOBBER(...) __VA_ARGS__
#define SCALE_OFFSET 1
#include "mpegvideoenc_qns_template.c"
#undef DEF
#undef SET_RND
+#undef SET_RND_TPL
+#undef COMMA_SET_RND_IN
+#undef SET_RND_CLOBBER
#undef SCALE_OFFSET
#undef PMULHRW
#define DEF(x) x ## _3dnow
#define SET_RND(x)
+#define SET_RND_TPL(x)
+#define COMMA_SET_RND_IN
+#define SET_RND_CLOBBER(...)
#define SCALE_OFFSET 0
#define PMULHRW(x, y, s, o) \
"pmulhrw " #s ", " #x " \n\t" \
@@ -71,6 +80,9 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
#undef DEF
#undef SET_RND
+#undef SET_RND_TPL
+#undef COMMA_SET_RND_IN
+#undef SET_RND_CLOBBER
#undef SCALE_OFFSET
#undef PMULHRW
@@ -78,6 +90,9 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
#undef PHADDD
#define DEF(x) x ## _ssse3
#define SET_RND(x)
+#define SET_RND_TPL(x)
+#define COMMA_SET_RND_IN
+#define SET_RND_CLOBBER(...)
#define SCALE_OFFSET -1
#define PHADDD(a, t) \
@@ -93,6 +108,9 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
#undef DEF
#undef SET_RND
+#undef SET_RND_TPL
+#undef COMMA_SET_RND_IN
+#undef SET_RND_CLOBBER
#undef SCALE_OFFSET
#undef PMULHRW
#undef PHADDD
@@ -127,7 +145,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
"jb 1b \n\t"
: "+r" (ptr)
: "r" ((x86_reg) wrap), "r" ((x86_reg) width),
- "r" (ptr + wrap * height));
+ "r" (ptr + wrap * height)
+ MMX_CLOBBERS_ONLY("mm0", "mm1") );
} else if (w == 16) {
__asm__ volatile (
"1: \n\t"
@@ -148,7 +167,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
"jb 1b \n\t"
: "+r"(ptr)
: "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
- );
+ MMX_CLOBBERS_ONLY("mm0", "mm1") );
} else {
av_assert1(w == 4);
__asm__ volatile (
@@ -167,7 +186,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
"jb 1b \n\t"
: "+r" (ptr)
: "r" ((x86_reg) wrap), "r" ((x86_reg) width),
- "r" (ptr + wrap * height));
+ "r" (ptr + wrap * height)
+ MMX_CLOBBERS_ONLY("mm0", "mm1") );
}
/* top and bottom (and hopefully also the corners) */
@@ -187,7 +207,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
: "+r" (ptr)
: "r" ((x86_reg) buf - (x86_reg) ptr - w),
"r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3),
- "r" (ptr + width + 2 * w));
+ "r" (ptr + width + 2 * w)
+ MMX_CLOBBERS_ONLY("mm0") );
}
}
@@ -207,7 +228,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
: "+r" (ptr)
: "r" ((x86_reg) last_line - (x86_reg) ptr - w),
"r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3),
- "r" (ptr + width + 2 * w));
+ "r" (ptr + width + 2 * w)
+ MMX_CLOBBERS_ONLY("mm0") );
}
}
}
--
2.17.1
More information about the ffmpeg-devel
mailing list