[FFmpeg-devel] [PATCH] avfilter/vf_fspp.c: make the filter compile with latest LLVM

Thierry Foucu tfoucu at gmail.com
Thu Oct 31 19:53:22 CET 2013


Make the vf_fspp code not use the incorrectly documented and essentially
compiler-internal inline assembly constraint "o" which simply does not work
the way it was being used. Instead, pass explicit memory operands out of the
array of temporaries. The compiler is smart enough to build them with
immediate offsets of a common register address.

In the process, remove 2 unused 8-byte temporaries from two functions in this
filter. Doing this saves 16 bytes of stack space, and in row_idct_mmx allows
the use of simpler addressing modes (no offset from %rsp) which saves 3 bytes
of encoded instruction stream.

Otherwise, this produces *exactly* the same assembly as the previous version,
but can be assembled by LLVM as well as GCC.

For a discussion of why not to use the "o" constraint, see iant's replies on
this thread:
  http://gcc.gnu.org/ml/gcc-help/2011-04/msg00518.html
---
 libavfilter/libmpcodecs/vf_fspp.c | 80 ++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/libavfilter/libmpcodecs/vf_fspp.c b/libavfilter/libmpcodecs/vf_fspp.c
index a8a33e2..ac5f1ce 100644
--- a/libavfilter/libmpcodecs/vf_fspp.c
+++ b/libavfilter/libmpcodecs/vf_fspp.c
@@ -974,7 +974,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"
         "paddw %%mm6, %%mm2            \n\t" //'t0
 
-        "movq %%mm2, 0*8+%3            \n\t" //!
+        "movq %%mm2, %3                \n\t" //!
         "psubw %%mm6, %%mm7            \n\t" //'t3
 
         "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
@@ -983,7 +983,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5
         "movq %%mm5, %%mm6             \n\t"
 
-        "movq %%mm7, 3*8+%3            \n\t"
+        "movq %%mm7, %6                \n\t"
         "paddw %%mm2, %%mm3            \n\t" //t10
 
         "paddw %%mm4, %%mm2            \n\t" //t11
@@ -1010,13 +1010,13 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
 
         "paddw %%mm3, %%mm7            \n\t" //z2
 
-        "movq %%mm5, 1*8+%3            \n\t"
+        "movq %%mm5, %4                \n\t"
         "paddw %%mm3, %%mm4            \n\t" //z4
 
         "movq 3*16(%%"REG_d"), %%mm3       \n\t"
         "movq %%mm0, %%mm1             \n\t"
 
-        "movq %%mm6, 2*8+%3            \n\t"
+        "movq %%mm6, %5                \n\t"
         "psubw %%mm2, %%mm1            \n\t" //z13
 
 //===
@@ -1076,7 +1076,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         // t4 t5 - - - t6 t7 -
         //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0
 //Typical numbers: nondc - 19%%,  dc - 26%%,  zero - 55%%. zero case alone isn't worthwhile
-        "movq 0*8+%3, %%mm4            \n\t"
+        "movq %3, %%mm4                \n\t"
         "movq %%mm0, %%mm1             \n\t"
 
         "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6
@@ -1088,7 +1088,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5
         "paddw %%mm4, %%mm5            \n\t"
 
-        "movq 1*8+%3, %%mm6            \n\t"
+        "movq %4, %%mm6                \n\t"
         //paddw mm3, MM_2
         "psraw $2, %%mm3              \n\t" //tmp7
 
@@ -1101,7 +1101,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
         "paddw %%mm6, %%mm7            \n\t"
 
-        "movq 2*8+%3, %%mm3            \n\t"
+        "movq %5, %%mm3                \n\t"
         "psubw %%mm0, %%mm6            \n\t"
 
         "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"
@@ -1119,7 +1119,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"
         "paddw %%mm3, %%mm5            \n\t"
 
-        "movq 3*8+%3, %%mm0            \n\t"
+        "movq %6, %%mm0                \n\t"
         "add $8, %%"REG_S"               \n\t"
 
         "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
@@ -1168,7 +1168,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq %%mm2, %%mm7             \n\t"
 
         //---
-        "movq 0*8+%3, %%mm4            \n\t"
+        "movq %3, %%mm4                \n\t"
         "psubw %%mm3, %%mm2            \n\t"
 
         "psllw $1, %%mm2              \n\t"
@@ -1182,7 +1182,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"
         "psubw %%mm7, %%mm6            \n\t"
 
-        "movq 1*8+%3, %%mm3            \n\t"
+        "movq %4, %%mm3                \n\t"
         "paddw %%mm7, %%mm4            \n\t"
 
         "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
@@ -1191,10 +1191,10 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
         "psubw %%mm7, %%mm1            \n\t" //'t6
 
-        "movq 2*8+%3, %%mm7            \n\t"
+        "movq %5, %%mm7                \n\t"
         "psubw %%mm5, %%mm0            \n\t" //'t10
 
-        "movq 3*8+%3, %%mm6            \n\t"
+        "movq %6, %%mm6                \n\t"
         "movq %%mm3, %%mm5             \n\t"
 
         "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"
@@ -1332,7 +1332,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"
         "paddw %%mm6, %%mm2            \n\t" //'t0
 
-        "movq %%mm2, 0*8+%3            \n\t" //!
+        "movq %%mm2, %3                \n\t" //!
         "psubw %%mm6, %%mm7            \n\t" //'t3
 
         "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
@@ -1341,7 +1341,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5
         "movq %%mm5, %%mm6             \n\t"
 
-        "movq %%mm7, 3*8+%3            \n\t"
+        "movq %%mm7, %6                \n\t"
         "paddw %%mm2, %%mm3            \n\t" //t10
 
         "paddw %%mm4, %%mm2            \n\t" //t11
@@ -1368,13 +1368,13 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
 
         "paddw %%mm3, %%mm7            \n\t" //z2
 
-        "movq %%mm5, 1*8+%3            \n\t"
+        "movq %%mm5, %4                \n\t"
         "paddw %%mm3, %%mm4            \n\t" //z4
 
         "movq 1*8+3*16(%%"REG_d"), %%mm3   \n\t"
         "movq %%mm0, %%mm1             \n\t"
 
-        "movq %%mm6, 2*8+%3            \n\t"
+        "movq %%mm6, %5                \n\t"
         "psubw %%mm2, %%mm1            \n\t" //z13
 
 //===
@@ -1434,7 +1434,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         // t4 t5 - - - t6 t7 -
         //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0
 //Typical numbers: nondc - 19%%,  dc - 26%%,  zero - 55%%. zero case alone isn't worthwhile
-        "movq 0*8+%3, %%mm4            \n\t"
+        "movq %3, %%mm4                \n\t"
         "movq %%mm0, %%mm1             \n\t"
 
         "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6
@@ -1446,7 +1446,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5
         "paddw %%mm4, %%mm5            \n\t"
 
-        "movq 1*8+%3, %%mm6            \n\t"
+        "movq %4, %%mm6                \n\t"
         //paddw mm3, MM_2
         "psraw $2, %%mm3              \n\t" //tmp7
 
@@ -1459,7 +1459,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
         "paddw %%mm6, %%mm7            \n\t"
 
-        "movq 2*8+%3, %%mm3            \n\t"
+        "movq %5, %%mm3                \n\t"
         "psubw %%mm0, %%mm6            \n\t"
 
         "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"
@@ -1477,7 +1477,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"
         "paddw %%mm3, %%mm5            \n\t"
 
-        "movq 3*8+%3, %%mm0            \n\t"
+        "movq %6, %%mm0                \n\t"
         "add $24, %%"REG_S"              \n\t"
 
         "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
@@ -1528,7 +1528,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq %%mm2, %%mm7             \n\t"
 
         //---
-        "movq 0*8+%3, %%mm4            \n\t"
+        "movq %3, %%mm4                \n\t"
         "psubw %%mm3, %%mm2            \n\t"
 
         "psllw $1, %%mm2              \n\t"
@@ -1542,7 +1542,7 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"
         "psubw %%mm7, %%mm6            \n\t"
 
-        "movq 1*8+%3, %%mm3            \n\t"
+        "movq %4, %%mm3                \n\t"
         "paddw %%mm7, %%mm4            \n\t"
 
         "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
@@ -1551,10 +1551,10 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
         "psubw %%mm7, %%mm1            \n\t" //'t6
 
-        "movq 2*8+%3, %%mm7            \n\t"
+        "movq %5, %%mm7                \n\t"
         "psubw %%mm5, %%mm0            \n\t" //'t10
 
-        "movq 3*8+%3, %%mm6            \n\t"
+        "movq %6, %%mm6                \n\t"
         "movq %%mm3, %%mm5             \n\t"
 
         "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"
@@ -1596,7 +1596,8 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
         "jnz 1b                \n\t"
         "5:                      \n\t"
 
-        : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps)
+        : "+S"(data), "+D"(output), "+c"(cnt), "=m"(temps[0]), "=m"(temps[1]),
+          "=m"(temps[2]), "=m"(temps[3])
         : "d"(thr_adr)
         : "%"REG_a
         );
@@ -1674,7 +1675,7 @@ static void row_idct_c(int16_t* workspace,
 static void row_idct_mmx (int16_t* workspace,
                           int16_t* output_adr,  int output_stride,  int cnt)
 {
-    uint64_t __attribute__((aligned(8))) temps[4];
+    uint64_t __attribute__((aligned(8))) temps[2];
     __asm__ volatile(
         "lea (%%"REG_a",%%"REG_a",2), %%"REG_d"    \n\t"
         "1:                     \n\t"
@@ -1730,10 +1731,10 @@ static void row_idct_mmx (int16_t* workspace,
         "movq "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_S"), %%mm5 \n\t"
         "paddw %%mm0, %%mm1            \n\t" //t1
 
-        "movq %%mm4, 0*8+%3            \n\t" //t0
+        "movq %%mm4, %3                \n\t" //t0
         "movq %%mm3, %%mm4             \n\t"
 
-        "movq %%mm6, 1*8+%3            \n\t" //t3
+        "movq %%mm6, %4                \n\t" //t3
         "punpcklwd %%mm2, %%mm3        \n\t"
 
         //transpose 4x4
@@ -1783,7 +1784,7 @@ static void row_idct_mmx (int16_t* workspace,
         "psllw $3, %%mm0              \n\t"
         "psubw %%mm3, %%mm4            \n\t" //t10
 
-        "movq 0*8+%3, %%mm6            \n\t"
+        "movq %3, %%mm6                \n\t"
         "movq %%mm1, %%mm3             \n\t"
 
         "psllw $3, %%mm4              \n\t"
@@ -1804,7 +1805,7 @@ static void row_idct_mmx (int16_t* workspace,
         "movq "MANGLE(MM_DESCALE_RND)", %%mm2   \n\t" //4
         "psubw %%mm5, %%mm6            \n\t" //d7
 
-        "paddw 0*8+%3, %%mm5           \n\t" //d0
+        "paddw %3, %%mm5               \n\t" //d0
         "paddw %%mm2, %%mm1            \n\t"
 
         "paddw %%mm2, %%mm5            \n\t"
@@ -1831,7 +1832,7 @@ static void row_idct_mmx (int16_t* workspace,
         "movq %%mm7, (%%"REG_D",%%"REG_a",2)    \n\t"
         "add %%"REG_d", %%"REG_D"             \n\t" //3*ls
 
-        "movq 1*8+%3, %%mm5           \n\t" //t3
+        "movq %4, %%mm5               \n\t" //t3
         "psraw $3, %%mm3              \n\t"
 
         "paddw (%%"REG_D",%%"REG_a",2), %%mm0   \n\t"
@@ -1840,7 +1841,7 @@ static void row_idct_mmx (int16_t* workspace,
         "paddw (%%"REG_D",%%"REG_d",), %%mm3    \n\t"
         "psraw $3, %%mm6              \n\t"
 
-        "paddw 1*8+%3, %%mm4           \n\t" //d4
+        "paddw %4, %%mm4               \n\t" //d4
         "paddw %%mm2, %%mm5            \n\t"
 
         "paddw (%%"REG_D",%%"REG_a",4), %%mm6   \n\t"
@@ -1865,7 +1866,8 @@ static void row_idct_mmx (int16_t* workspace,
         "dec %%"REG_c"                   \n\t"
         "jnz 1b                  \n\t"
 
-        : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps)
+        : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=m"(temps[0]),
+          "=m"(temps[1])
         : "a"(output_stride*sizeof(short))
         : "%"REG_d
         );
@@ -1940,7 +1942,7 @@ static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int
 
 static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,  int cnt)
 {
-    uint64_t __attribute__((aligned(8))) temps[4];
+    uint64_t __attribute__((aligned(8))) temps[2];
     __asm__ volatile(
         "lea (%%"REG_a",%%"REG_a",2), %%"REG_d"    \n\t"
         "6:                     \n\t"
@@ -1974,10 +1976,10 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
         "movd (%%"REG_S",%%"REG_a",2), %%mm3    \n\t" //5
         "paddw %%mm4, %%mm1            \n\t"
 
-        "movq %%mm5, 0*8+%3            \n\t" //t7
+        "movq %%mm5, %3                \n\t" //t7
         "punpcklbw %%mm7, %%mm3        \n\t"
 
-        "movq %%mm6, 1*8+%3            \n\t" //t6
+        "movq %%mm6, %4                \n\t" //t6
         "movq %%mm2, %%mm4             \n\t"
 
         "movd (%%"REG_S"), %%mm5           \n\t" //3
@@ -2023,7 +2025,7 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
         "psubw %%mm1, %%mm5            \n\t" //d1
         "movq %%mm0, %%mm6             \n\t"
 
-        "movq 1*8+%3, %%mm1            \n\t"
+        "movq %4, %%mm1                \n\t"
         "punpcklwd %%mm5, %%mm0        \n\t"
 
         "punpckhwd %%mm5, %%mm6        \n\t"
@@ -2047,7 +2049,7 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
         "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
         "psllw $2, %%mm3              \n\t" //t10
 
-        "movq 0*8+%3, %%mm2           \n\t"
+        "movq %3, %%mm2               \n\t"
         "psllw $2, %%mm4              \n\t" //t11
 
         "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3
@@ -2110,7 +2112,7 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
         "dec %%"REG_c"                   \n\t"
         "jnz 6b                  \n\t"
 
-        : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps)
+        : "+S"(pixels), "+D"(data), "+c"(cnt), "=m"(temps[0]), "=m"(temps[1])
         : "a"(line_size)
         : "%"REG_d);
 }
-- 
1.8.4.1



More information about the ffmpeg-devel mailing list