[FFmpeg-cvslog] r25597 - trunk/libavcodec/x86/h264_qpel_mmx.c
Yuriy Kaminskiy
yumkam
Thu Oct 28 21:38:39 CEST 2010
ramiro wrote:
> Author: ramiro
> Date: Thu Oct 28 20:22:21 2010
> New Revision: 25597
>
> Log:
> h264dsp: merge some more asm blocks
>
> Modified:
> trunk/libavcodec/x86/h264_qpel_mmx.c
>
> Modified: trunk/libavcodec/x86/h264_qpel_mmx.c
> ==============================================================================
> --- trunk/libavcodec/x86/h264_qpel_mmx.c Thu Oct 28 15:20:26 2010 (r25596)
> +++ trunk/libavcodec/x86/h264_qpel_mmx.c Thu Oct 28 20:22:21 2010 (r25597)
> @@ -31,8 +31,8 @@
> "psubw "#B", "#T" \n\t"\
> "psubw "#E", "#T" \n\t"\
> "punpcklbw "#Z", "#F" \n\t"\
> - "pmullw %4, "#T" \n\t"\
> - "paddw %5, "#A" \n\t"\
> + "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
> + "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
> "add %2, %0 \n\t"\
> "paddw "#F", "#A" \n\t"\
> "paddw "#A", "#T" \n\t"\
> @@ -46,11 +46,11 @@
> "mov"#d" (%0), "#F" \n\t"\
> "paddw "#D", "#T" \n\t"\
> "psllw $2, "#T" \n\t"\
> - "paddw %4, "#A" \n\t"\
> + "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
> "psubw "#B", "#T" \n\t"\
> "psubw "#E", "#T" \n\t"\
> "punpcklbw "#Z", "#F" \n\t"\
> - "pmullw %3, "#T" \n\t"\
> + "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
> "paddw "#F", "#A" \n\t"\
> "add %2, %0 \n\t"\
> "paddw "#A", "#T" \n\t"\
> @@ -204,7 +204,7 @@ static av_noinline void OPNAME ## h264_q
> QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
> \
> : "+a"(src)\
> - : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> + : "c"(tmp), "S"((x86_reg)srcStride)\
> : "memory"\
> );\
> tmp += 4;\
> @@ -385,13 +385,8 @@ static av_noinline void OPNAME ## h264_q
> QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
> QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
> QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
> - \
> - : "+a"(src), "+c"(dst)\
> - : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> - : "memory"\
> - );\
> - if(h==16){\
> - __asm__ volatile(\
> + "cmpl $16, %4 \n\t"\
> + "jne 2f \n\t"\
> QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
> QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
> QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
> @@ -400,12 +395,12 @@ static av_noinline void OPNAME ## h264_q
> QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
> QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
> QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
> + "2: \n\t"\
> \
> : "+a"(src), "+c"(dst)\
> - : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> + : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
> : "memory"\
> );\
> - }\
> src += 4-(h+5)*srcStride;\
> dst += 4-h*dstStride;\
> }\
> @@ -439,12 +434,8 @@ static av_always_inline void OPNAME ## h
> QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
> QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
> QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
> - : "+a"(src)\
> - : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> - : "memory"\
> - );\
> - if(size==16){\
Size is compile-time constant, so this check was always-true, or always-false
before, now it is always evaluated at runtime.
> @@ -811,13 +802,8 @@ static av_noinline void OPNAME ## h264_q
> QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
> QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
> QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
> - \
> - : "+a"(src), "+c"(dst)\
> - : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> - : "memory"\
> - );\
> - if(h==16){\
Same here, h is compile-time constant.
> - __asm__ volatile(\
> + "cmpl $16, %4 \n\t"\
> + "jne 2f \n\t"\
> QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
> QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
> QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
> @@ -870,12 +856,8 @@ static av_always_inline void put_h264_qp
> QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 5*48)
> QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
> QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
> - : "+a"(src)
> - : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
> - : "memory"
> - );
> - if(size==16){
Same here.
> - __asm__ volatile(
> + "cmpl $16, %3 \n\t"
> + "jne 2f \n\t"
> QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48)
> QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48)
> QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48)
More information about the ffmpeg-cvslog
mailing list