[FFmpeg-devel] [PATCH] Added AVX2 implementation for VP8 decoder (ff_pred16x16_tm_vp8_8_avx2)

Ronald S. Bultje rsbultje at gmail.com
Mon Mar 20 15:53:57 EET 2017


Hi,

On Sat, Mar 18, 2017 at 3:50 PM, Mirage Abeysekara <mirage.12 at cse.mrt.ac.lk>
wrote:

> ---
>  libavcodec/x86/h264_intrapred.asm    | 37 ++++++++++++++++++++++++++++++
> ++++++
>  libavcodec/x86/h264_intrapred_init.c |  7 +++++++
>  2 files changed, 44 insertions(+)
>
> diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_
> intrapred.asm
> index c88d91b..0f3b462 100644
> --- a/libavcodec/x86/h264_intrapred.asm
> +++ b/libavcodec/x86/h264_intrapred.asm
> @@ -268,6 +268,43 @@ cglobal pred16x16_tm_vp8_8, 2,6,6
>      jg .loop
>      REP_RET
>
> +%if HAVE_AVX2_EXTERNAL
> +INIT_YMM avx2
> +cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
> +    sub                       dstq, strideq
> +    pmovzxbw                    m0, [dstq]
> +    vpbroadcastb               xm1, [r0-1]
> +    pmovzxbw                    m1, xm1
> +    psubw                       m0, m1
> +    mov                 iterationd, 4
> +    lea                   stride3q, [strideq*3]
> +.loop:
> +    vpbroadcastb               xm1, [dstq+strideq*1-1]
> +    vpbroadcastb               xm2, [dstq+strideq*2-1]
> +    vpbroadcastb               xm3, [dstq+stride3q-1]
> +    vpbroadcastb               xm4, [dstq+strideq*4-1]
> +    pmovzxbw                    m1, xm1
> +    pmovzxbw                    m2, xm2
> +    pmovzxbw                    m3, xm3
> +    pmovzxbw                    m4, xm4
> +    paddw                       m1, m0
> +    paddw                       m2, m0
> +    paddw                       m3, m0
> +    paddw                       m4, m0
> +    vpackuswb                   m1, m1, m2
> +    vpackuswb                   m3, m3, m4
> +    vpermq                      m1, m1, q3120
> +    vpermq                      m3, m3, q3120
> +    movdqa        [dstq+strideq*1], xm1
> +    vextracti128  [dstq+strideq*2], m1, 1
> +    movdqa       [dstq+stride3q*1], xm3
> +    vextracti128  [dstq+strideq*4], m3, 1
> +    lea                       dstq, [dstq+strideq*4]
> +    dec                 iterationd
> +    jg .loop
> +    REP_RET
> +%endif
> +
>  ;-----------------------------------------------------------
> ------------------
>  ; void ff_pred16x16_plane_*_8(uint8_t *src, int stride)
>  ;-----------------------------------------------------------
> ------------------
> diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_
> intrapred_init.c
> index 528b92e..bdd5125 100644
> --- a/libavcodec/x86/h264_intrapred_init.c
> +++ b/libavcodec/x86/h264_intrapred_init.c
> @@ -127,6 +127,7 @@ PRED16x16(plane_svq3, 8, ssse3)
>  PRED16x16(tm_vp8, 8, mmx)
>  PRED16x16(tm_vp8, 8, mmxext)
>  PRED16x16(tm_vp8, 8, sse2)
> +PRED16x16(tm_vp8, 8, avx2)
>
>  PRED8x8(top_dc, 8, mmxext)
>  PRED8x8(dc_rv40, 8, mmxext)
> @@ -323,6 +324,12 @@ av_cold void ff_h264_pred_init_x86(H264PredContext
> *h, int codec_id,
>                  }
>              }
>          }
> +
> +        if(EXTERNAL_AVX2(cpu_flags)){
> +            if (codec_id == AV_CODEC_ID_VP8) {
> +                h->pred16x16[PLANE_PRED8x8    ] =
> ff_pred16x16_tm_vp8_8_avx2;
> +            }
> +        }
>      } else if (bit_depth == 10) {
>          if (EXTERNAL_MMXEXT(cpu_flags)) {
>              h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
> --
> 2.7.4


Pushed.

Ronald


More information about the ffmpeg-devel mailing list