[FFmpeg-devel] [PATCH] [WIP] dnxhdenc: get_pixels_8x4_sym_10bit_sse2

Timothy Gu timothygu99 at gmail.com
Wed Apr 9 05:51:59 CEST 2014


On Tue, Apr 8, 2014 at 8:42 PM, Timothy Gu <timothygu99 at gmail.com> wrote:
> Before:
> 3383 decicycles in dnxhd_10bit_get_pixels_8x4_sym, 130910 runs, 162 skips
> After:
> 750 decicycles in ff_get_pixels_8x4_sym_10bit_sse2, 130999 runs, 73 skips
>
> Overall performance impact negligible.
>
> Signed-off-by: Timothy Gu <timothygu99 at gmail.com>
> ---
>  libavcodec/x86/dnxhdenc.asm    | 41 +++++++++++++++++++++++++++++------------
>  libavcodec/x86/dnxhdenc_init.c |  4 ++++
>  2 files changed, 33 insertions(+), 12 deletions(-)
>
> diff --git a/libavcodec/x86/dnxhdenc.asm b/libavcodec/x86/dnxhdenc.asm
> index 9dd6d51..d42530b 100644
> --- a/libavcodec/x86/dnxhdenc.asm
> +++ b/libavcodec/x86/dnxhdenc.asm
> @@ -26,18 +26,30 @@ section .text
>
>  ; void get_pixels_8x4_sym_sse2(int16_t *block, const uint8_t *pixels,
>  ;                              ptrdiff_t line_size)
> -INIT_XMM sse2
> -cglobal get_pixels_8x4_sym, 3,3,5, block, pixels, linesize
> -    pxor      m4,       m4
> -    movq      m0,       [pixelsq]
> -    add       pixelsq,  linesizeq
> -    movq      m1,       [pixelsq]
> -    movq      m2,       [pixelsq+linesizeq]
> -    movq      m3,       [pixelsq+linesizeq*2]
> -    punpcklbw m0,       m4
> -    punpcklbw m1,       m4
> -    punpcklbw m2,       m4
> -    punpcklbw m3,       m4
> +
> +%macro GET_PIXELS 1
> +%if %1 == 8
> +cglobal get_pixels_8x4_sym,       3,3,5, block, pixels, linesize
> +%elif %1 == 16
> +cglobal get_pixels_8x4_sym_10bit, 3,3,4, block, pixels, linesize
> +%endif
> +    %if %1 == mmsize/2
> +        pxor        m4, m4
> +        %define LOAD movh
> +    %elif %1 == mmsize && %1 == 16
> +        %define LOAD movu
> +    %endif
> +    LOAD            m0, [pixelsq]
> +    add        pixelsq, linesizeq
> +    LOAD            m1, [pixelsq]
> +    LOAD            m2, [pixelsq+linesizeq]
> +    LOAD            m3, [pixelsq+linesizeq*2]

I probably messed up the loading and linesize here. Can someone give
me a pointer on how to fix it?

[...]

Timothy


More information about the ffmpeg-devel mailing list