[FFmpeg-cvslog] r25597 - trunk/libavcodec/x86/h264_qpel_mmx.c

Yuriy Kaminskiy yumkam
Thu Oct 28 21:38:39 CEST 2010


ramiro wrote:
> Author: ramiro
> Date: Thu Oct 28 20:22:21 2010
> New Revision: 25597
> 
> Log:
> h264dsp: merge some more asm blocks
> 
> Modified:
>    trunk/libavcodec/x86/h264_qpel_mmx.c
> 
> Modified: trunk/libavcodec/x86/h264_qpel_mmx.c
> ==============================================================================
> --- trunk/libavcodec/x86/h264_qpel_mmx.c	Thu Oct 28 15:20:26 2010	(r25596)
> +++ trunk/libavcodec/x86/h264_qpel_mmx.c	Thu Oct 28 20:22:21 2010	(r25597)
> @@ -31,8 +31,8 @@
>          "psubw "#B", "#T"           \n\t"\
>          "psubw "#E", "#T"           \n\t"\
>          "punpcklbw "#Z", "#F"       \n\t"\
> -        "pmullw %4, "#T"            \n\t"\
> -        "paddw %5, "#A"             \n\t"\
> +        "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
> +        "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
>          "add %2, %0                 \n\t"\
>          "paddw "#F", "#A"           \n\t"\
>          "paddw "#A", "#T"           \n\t"\
> @@ -46,11 +46,11 @@
>          "mov"#d" (%0), "#F"         \n\t"\
>          "paddw "#D", "#T"           \n\t"\
>          "psllw $2, "#T"             \n\t"\
> -        "paddw %4, "#A"             \n\t"\
> +        "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
>          "psubw "#B", "#T"           \n\t"\
>          "psubw "#E", "#T"           \n\t"\
>          "punpcklbw "#Z", "#F"       \n\t"\
> -        "pmullw %3, "#T"            \n\t"\
> +        "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
>          "paddw "#F", "#A"           \n\t"\
>          "add %2, %0                 \n\t"\
>          "paddw "#A", "#T"           \n\t"\
> @@ -204,7 +204,7 @@ static av_noinline void OPNAME ## h264_q
>              QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
>               \
>              : "+a"(src)\
> -            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> +            : "c"(tmp), "S"((x86_reg)srcStride)\
>              : "memory"\
>          );\
>          tmp += 4;\
> @@ -385,13 +385,8 @@ static av_noinline void OPNAME ## h264_q
>          QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
>          QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
>          QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
> -         \
> -        : "+a"(src), "+c"(dst)\
> -        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> -        : "memory"\
> -     );\
> -     if(h==16){\
> -        __asm__ volatile(\
> +        "cmpl $16, %4               \n\t"\
> +        "jne 2f                     \n\t"\
>              QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
>              QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
>              QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
> @@ -400,12 +395,12 @@ static av_noinline void OPNAME ## h264_q
>              QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
>              QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
>              QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
> +        "2:                         \n\t"\
>              \
>             : "+a"(src), "+c"(dst)\
> -           : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> +           : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
>             : "memory"\
>          );\
> -     }\
>       src += 4-(h+5)*srcStride;\
>       dst += 4-h*dstStride;\
>     }\
> @@ -439,12 +434,8 @@ static av_always_inline void OPNAME ## h
>              QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
>              QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
>              QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
> -            : "+a"(src)\
> -            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> -            : "memory"\
> -        );\
> -        if(size==16){\

Size is compile-time constant, so this check was always-true, or always-false
before, now it is always evaluated at runtime.

> @@ -811,13 +802,8 @@ static av_noinline void OPNAME ## h264_q
>          QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
>          QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
>          QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
> -         \
> -        : "+a"(src), "+c"(dst)\
> -        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> -        : "memory"\
> -    );\
> -    if(h==16){\
Same here, h is compile-time constant.

> -        __asm__ volatile(\
> +        "cmpl $16, %4               \n\t"\
> +        "jne 2f                     \n\t"\
>              QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
>              QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
>              QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
> @@ -870,12 +856,8 @@ static av_always_inline void put_h264_qp
>              QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 5*48)
>              QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
>              QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
> -            : "+a"(src)
> -            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
> -            : "memory"
> -        );
> -        if(size==16){
Same here.
> -            __asm__ volatile(
> +            "cmpl $16, %3               \n\t"
> +            "jne 2f                     \n\t"
>                  QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1,  8*48)
>                  QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2,  9*48)
>                  QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48)




More information about the ffmpeg-cvslog mailing list