[FFmpeg-cvslog] r21360 - trunk/libavcodec/x86/h264dsp_mmx.c
conrad
subversion
Thu Jan 21 10:46:58 CET 2010
Author: conrad
Date: Thu Jan 21 10:46:57 2010
New Revision: 21360
Log:
Use two separate memory arguments since 8+() is invalid gas syntax
Modified:
trunk/libavcodec/x86/h264dsp_mmx.c
Modified: trunk/libavcodec/x86/h264dsp_mmx.c
==============================================================================
--- trunk/libavcodec/x86/h264dsp_mmx.c Wed Jan 20 23:55:55 2010 (r21359)
+++ trunk/libavcodec/x86/h264dsp_mmx.c Thu Jan 21 10:46:57 2010 (r21360)
@@ -617,7 +617,7 @@ static void ff_h264_idct_add8_sse2(uint8
"pavgb %%mm2, "#tmp" \n\t"\
"pavgb "#tmp", "#q2" \n\t" /* avg(p2,avg(p0,q0)) */\
"pxor "q2addr", "#tmp" \n\t"\
- "pand %8, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\
+ "pand %9, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\
"psubusb "#tmp", "#q2" \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\
"movq "#p1", "#tmp" \n\t"\
"psubusb "#tc0", "#tmp" \n\t"\
@@ -631,48 +631,48 @@ static inline void h264_loop_filter_luma
DECLARE_ALIGNED_8(uint64_t, tmp0[2]);
__asm__ volatile(
- "movq (%1,%3), %%mm0 \n\t" //p1
- "movq (%1,%3,2), %%mm1 \n\t" //p0
- "movq (%2), %%mm2 \n\t" //q0
- "movq (%2,%3), %%mm3 \n\t" //q1
- H264_DEBLOCK_MASK(%6, %7)
+ "movq (%2,%4), %%mm0 \n\t" //p1
+ "movq (%2,%4,2), %%mm1 \n\t" //p0
+ "movq (%3), %%mm2 \n\t" //q0
+ "movq (%3,%4), %%mm3 \n\t" //q1
+ H264_DEBLOCK_MASK(%7, %8)
- "movd %5, %%mm4 \n\t"
+ "movd %6, %%mm4 \n\t"
"punpcklbw %%mm4, %%mm4 \n\t"
"punpcklwd %%mm4, %%mm4 \n\t"
"pcmpeqb %%mm3, %%mm3 \n\t"
"movq %%mm4, %%mm6 \n\t"
"pcmpgtb %%mm3, %%mm4 \n\t"
- "movq %%mm6, 8+%0 \n\t"
+ "movq %%mm6, %1 \n\t"
"pand %%mm4, %%mm7 \n\t"
"movq %%mm7, %0 \n\t"
/* filter p1 */
- "movq (%1), %%mm3 \n\t" //p2
+ "movq (%2), %%mm3 \n\t" //p2
DIFF_GT2_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1
"pand %%mm7, %%mm6 \n\t" // mask & |p2-p0|<beta
- "pand 8+%0, %%mm7 \n\t" // mask & tc0
+ "pand %1, %%mm7 \n\t" // mask & tc0
"movq %%mm7, %%mm4 \n\t"
"psubb %%mm6, %%mm7 \n\t"
"pand %%mm4, %%mm6 \n\t" // mask & |p2-p0|<beta & tc0
- H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%1)", "(%1,%3)", %%mm6, %%mm4)
+ H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%2)", "(%2,%4)", %%mm6, %%mm4)
/* filter q1 */
- "movq (%2,%3,2), %%mm4 \n\t" //q2
+ "movq (%3,%4,2), %%mm4 \n\t" //q2
DIFF_GT2_MMX(%%mm2, %%mm4, %%mm5, %%mm6, %%mm3) // |q2-q0|>beta-1
"pand %0, %%mm6 \n\t"
- "movq 8+%0, %%mm5 \n\t" // can be merged with the and below but is slower then
+ "movq %1, %%mm5 \n\t" // can be merged with the and below but is slower then
"pand %%mm6, %%mm5 \n\t"
"psubb %%mm6, %%mm7 \n\t"
- "movq (%2,%3), %%mm3 \n\t"
- H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6)
+ "movq (%3,%4), %%mm3 \n\t"
+ H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%3,%4,2)", "(%3,%4)", %%mm5, %%mm6)
/* filter p0, q0 */
- H264_DEBLOCK_P0_Q0(%8, unused)
- "movq %%mm1, (%1,%3,2) \n\t"
- "movq %%mm2, (%2) \n\t"
+ H264_DEBLOCK_P0_Q0(%9, unused)
+ "movq %%mm1, (%2,%4,2) \n\t"
+ "movq %%mm2, (%3) \n\t"
- : "=m"(*tmp0)
+ : "=m"(tmp0[0]), "=m"(tmp0[1])
: "r"(pix-3*stride), "r"(pix), "r"((x86_reg)stride),
"m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
"m"(ff_bone)
More information about the ffmpeg-cvslog
mailing list