[FFmpeg-devel] [PATCH 4/4] huffyuvencdsp: Add ff_diff_bytes_avx2
Ganesh Ajjanagadde
gajjanag at mit.edu
Mon Oct 19 22:36:26 CEST 2015
On Mon, Oct 19, 2015 at 4:00 PM, Timothy Gu <timothygu99 at gmail.com> wrote:
> About 16% faster on large clips (>1200px width), more than 2x slower on small clips
> (352px). So using a heuristic to select with one to use.
What system, what compiler, etc? Without any such information, numbers
are meaningless. Please either give them in full, or not at all -
particularly here since there is this "voodoo" threshold that needs to
be picked.
> ---
> libavcodec/huffyuvenc.c | 6 +++---
> libavcodec/huffyuvencdsp.c | 4 ++--
> libavcodec/huffyuvencdsp.h | 4 ++--
> libavcodec/pngenc.c | 2 +-
> libavcodec/utvideoenc.c | 2 +-
> libavcodec/x86/huffyuvencdsp.asm | 5 +++++
> libavcodec/x86/huffyuvencdsp_mmx.c | 9 ++++++++-
> 7 files changed, 22 insertions(+), 10 deletions(-)
>
> diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
> index 49d711a..7e133b5 100644
> --- a/libavcodec/huffyuvenc.c
> +++ b/libavcodec/huffyuvenc.c
> @@ -60,12 +60,12 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
> }
> return left;
> } else {
> - for (i = 0; i < 16; i++) {
> + for (i = 0; i < 32; i++) {
> const int temp = src[i];
> dst[i] = temp - left;
> left = temp;
> }
> - s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
> + s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32);
> return src[w-1];
> }
> } else {
> @@ -217,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
> const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
>
> ff_huffyuv_common_init(avctx);
> - ff_huffyuvencdsp_init(&s->hencdsp);
> + ff_huffyuvencdsp_init(&s->hencdsp, s->width);
>
> avctx->extradata = av_mallocz(3*MAX_N + 4);
> if (s->flags&AV_CODEC_FLAG_PASS1) {
> diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c
> index fdcd0b0..08bfd63 100644
> --- a/libavcodec/huffyuvencdsp.c
> +++ b/libavcodec/huffyuvencdsp.c
> @@ -74,11 +74,11 @@ static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
> *left_top = lt;
> }
>
> -av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c)
> +av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w)
> {
> c->diff_bytes = diff_bytes_c;
> c->sub_hfyu_median_pred = sub_hfyu_median_pred_c;
>
> if (ARCH_X86)
> - ff_huffyuvencdsp_init_x86(c);
> + ff_huffyuvencdsp_init_x86(c, w);
> }
> diff --git a/libavcodec/huffyuvencdsp.h b/libavcodec/huffyuvencdsp.h
> index 9d09095..d66590b 100644
> --- a/libavcodec/huffyuvencdsp.h
> +++ b/libavcodec/huffyuvencdsp.h
> @@ -35,7 +35,7 @@ typedef struct HuffYUVEncDSPContext {
> int *left, int *left_top);
> } HuffYUVEncDSPContext;
>
> -void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c);
> -void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c);
> +void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, int w);
> +void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, int w);
>
> #endif /* AVCODEC_HUFFYUVENCDSP_H */
> diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
> index 4204df2..26cde92 100644
> --- a/libavcodec/pngenc.c
> +++ b/libavcodec/pngenc.c
> @@ -981,7 +981,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
> FF_ENABLE_DEPRECATION_WARNINGS
> #endif
>
> - ff_huffyuvencdsp_init(&s->hdsp);
> + ff_huffyuvencdsp_init(&s->hdsp, avctx->width);
>
> s->filter_type = av_clip(avctx->prediction_method,
> PNG_FILTER_VALUE_NONE,
> diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
> index b8e1cc3..4753cfa 100644
> --- a/libavcodec/utvideoenc.c
> +++ b/libavcodec/utvideoenc.c
> @@ -109,7 +109,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
> }
>
> ff_bswapdsp_init(&c->bdsp);
> - ff_huffyuvencdsp_init(&c->hdsp);
> + ff_huffyuvencdsp_init(&c->hdsp, avctx->width);
>
> /* Check the prediction method, and error out if unsupported */
> if (avctx->prediction_method < 0 || avctx->prediction_method > 4) {
> diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm
> index 9625fbe..85a6616 100644
> --- a/libavcodec/x86/huffyuvencdsp.asm
> +++ b/libavcodec/x86/huffyuvencdsp.asm
> @@ -65,3 +65,8 @@ DIFF_BYTES
>
> INIT_XMM sse2
> DIFF_BYTES
> +
> +%if HAVE_AVX2_EXTERNAL
> +INIT_YMM avx2
> +DIFF_BYTES
> +%endif
> diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c
> index 9af5305..3eda0ba 100644
> --- a/libavcodec/x86/huffyuvencdsp_mmx.c
> +++ b/libavcodec/x86/huffyuvencdsp_mmx.c
> @@ -33,6 +33,8 @@ void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
> intptr_t w);
> void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
> intptr_t w);
> +void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
> + intptr_t w);
>
> #if HAVE_INLINE_ASM
>
> @@ -78,7 +80,7 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
>
> #endif /* HAVE_INLINE_ASM */
>
> -av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
> +av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, int w)
> {
> av_unused int cpu_flags = av_get_cpu_flags();
>
> @@ -93,4 +95,9 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
> if (EXTERNAL_SSE2(cpu_flags)) {
> c->diff_bytes = ff_diff_bytes_sse2;
> }
> +
> + // avx2 version only faster than sse2 when width is sufficiently large
> + if (EXTERNAL_AVX2(cpu_flags) && w > 1200) {
> + c->diff_bytes = ff_diff_bytes_avx2;
> + }
> }
> --
> 1.9.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
More information about the ffmpeg-devel
mailing list