[FFmpeg-devel] [PATCH 4/4] huffyuvencdsp: Add ff_diff_bytes_avx2

Ganesh Ajjanagadde gajjanag at mit.edu
Mon Oct 19 22:36:26 CEST 2015


On Mon, Oct 19, 2015 at 4:00 PM, Timothy Gu <timothygu99 at gmail.com> wrote:
> About 16% faster on large clips (>1200px width), more than 2x slower on small clips
> (352px). So using a heuristic to select with one to use.

What system, what compiler, etc? Without any such information, numbers
are meaningless. Please either give them in full, or not at all -
particularly here since there is this "voodoo" threshold that needs to
be picked.

> ---
>  libavcodec/huffyuvenc.c            | 6 +++---
>  libavcodec/huffyuvencdsp.c         | 4 ++--
>  libavcodec/huffyuvencdsp.h         | 4 ++--
>  libavcodec/pngenc.c                | 2 +-
>  libavcodec/utvideoenc.c            | 2 +-
>  libavcodec/x86/huffyuvencdsp.asm   | 5 +++++
>  libavcodec/x86/huffyuvencdsp_mmx.c | 9 ++++++++-
>  7 files changed, 22 insertions(+), 10 deletions(-)
>
> diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
> index 49d711a..7e133b5 100644
> --- a/libavcodec/huffyuvenc.c
> +++ b/libavcodec/huffyuvenc.c
> @@ -60,12 +60,12 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
>              }
>              return left;
>          } else {
> -            for (i = 0; i < 16; i++) {
> +            for (i = 0; i < 32; i++) {
>                  const int temp = src[i];
>                  dst[i] = temp - left;
>                  left   = temp;
>              }
> -            s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
> +            s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32);
>              return src[w-1];
>          }
>      } else {
> @@ -217,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
>
>      ff_huffyuv_common_init(avctx);
> -    ff_huffyuvencdsp_init(&s->hencdsp);
> +    ff_huffyuvencdsp_init(&s->hencdsp, s->width);
>
>      avctx->extradata = av_mallocz(3*MAX_N + 4);
>      if (s->flags&AV_CODEC_FLAG_PASS1) {
> diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c
> index fdcd0b0..08bfd63 100644
> --- a/libavcodec/huffyuvencdsp.c
> +++ b/libavcodec/huffyuvencdsp.c
> @@ -74,11 +74,11 @@ static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
>      *left_top = lt;
>  }
>
> -av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c)
> +av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w)
>  {
>      c->diff_bytes           = diff_bytes_c;
>      c->sub_hfyu_median_pred = sub_hfyu_median_pred_c;
>
>      if (ARCH_X86)
> -        ff_huffyuvencdsp_init_x86(c);
> +        ff_huffyuvencdsp_init_x86(c, w);
>  }
> diff --git a/libavcodec/huffyuvencdsp.h b/libavcodec/huffyuvencdsp.h
> index 9d09095..d66590b 100644
> --- a/libavcodec/huffyuvencdsp.h
> +++ b/libavcodec/huffyuvencdsp.h
> @@ -35,7 +35,7 @@ typedef struct HuffYUVEncDSPContext {
>                                   int *left, int *left_top);
>  } HuffYUVEncDSPContext;
>
> -void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c);
> -void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c);
> +void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w);
> +void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, int w);
>
>  #endif /* AVCODEC_HUFFYUVENCDSP_H */
> diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
> index 4204df2..26cde92 100644
> --- a/libavcodec/pngenc.c
> +++ b/libavcodec/pngenc.c
> @@ -981,7 +981,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
>  FF_ENABLE_DEPRECATION_WARNINGS
>  #endif
>
> -    ff_huffyuvencdsp_init(&s->hdsp);
> +    ff_huffyuvencdsp_init(&s->hdsp, avctx->width);
>
>      s->filter_type = av_clip(avctx->prediction_method,
>                               PNG_FILTER_VALUE_NONE,
> diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
> index b8e1cc3..4753cfa 100644
> --- a/libavcodec/utvideoenc.c
> +++ b/libavcodec/utvideoenc.c
> @@ -109,7 +109,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
>      }
>
>      ff_bswapdsp_init(&c->bdsp);
> -    ff_huffyuvencdsp_init(&c->hdsp);
> +    ff_huffyuvencdsp_init(&c->hdsp, avctx->width);
>
>      /* Check the prediction method, and error out if unsupported */
>      if (avctx->prediction_method < 0 || avctx->prediction_method > 4) {
> diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm
> index 9625fbe..85a6616 100644
> --- a/libavcodec/x86/huffyuvencdsp.asm
> +++ b/libavcodec/x86/huffyuvencdsp.asm
> @@ -65,3 +65,8 @@ DIFF_BYTES
>
>  INIT_XMM sse2
>  DIFF_BYTES
> +
> +%if HAVE_AVX2_EXTERNAL
> +INIT_YMM avx2
> +DIFF_BYTES
> +%endif
> diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c
> index 9af5305..3eda0ba 100644
> --- a/libavcodec/x86/huffyuvencdsp_mmx.c
> +++ b/libavcodec/x86/huffyuvencdsp_mmx.c
> @@ -33,6 +33,8 @@ void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
>                         intptr_t w);
>  void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
>                          intptr_t w);
> +void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
> +                        intptr_t w);
>
>  #if HAVE_INLINE_ASM
>
> @@ -78,7 +80,7 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
>
>  #endif /* HAVE_INLINE_ASM */
>
> -av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
> +av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, int w)
>  {
>      av_unused int cpu_flags = av_get_cpu_flags();
>
> @@ -93,4 +95,9 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
>      if (EXTERNAL_SSE2(cpu_flags)) {
>          c->diff_bytes = ff_diff_bytes_sse2;
>      }
> +
> +    // avx2 version only faster than sse2 when width is sufficiently large
> +    if (EXTERNAL_AVX2(cpu_flags) && w > 1200) {
> +        c->diff_bytes = ff_diff_bytes_avx2;
> +    }
>  }
> --
> 1.9.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


More information about the ffmpeg-devel mailing list