[FFmpeg-devel] [PATCH] avfilter/vf_blend: add x86 SIMD for some modes

Paul B Mahol onemda at gmail.com
Fri Oct 2 19:02:26 CEST 2015


On 10/2/15, Paul B Mahol <onemda at gmail.com> wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
>  libavfilter/blend.h             |  70 ++++++++++
>  libavfilter/vf_blend.c          | 100 +++++----------
>  libavfilter/x86/Makefile        |   4 +
>  libavfilter/x86/vf_blend.asm    | 278
> ++++++++++++++++++++++++++++++++++++++++
>  libavfilter/x86/vf_blend_init.c |  97 ++++++++++++++
>  5 files changed, 478 insertions(+), 71 deletions(-)
>  create mode 100644 libavfilter/blend.h
>  create mode 100644 libavfilter/x86/vf_blend.asm
>  create mode 100644 libavfilter/x86/vf_blend_init.c
>

[..]

> +
> +cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom,
> bottom_linesize, dst, dst_linesize, width, start, end
> +    add      topq, widthq
> +    add   bottomq, widthq
> +    add      dstq, widthq
> +    sub      endq, startq
> +    neg    widthq
> +.nextrow:
> +    mov       r10q, widthq
> +    %define      x  r10q
> +
> +    .loop:
> +        movh            m0, [topq + x]
> +        movh            m1, [bottomq + x]
> +        pminub          m0, m1
> +        movh    [dstq + x], m0
> +        add           r10q, mmsize / 2

Removed division.

> +    jl .loop
> +
> +    add          topq, top_linesizeq
> +    add       bottomq, bottom_linesizeq
> +    add          dstq, dst_linesizeq
> +    sub          endd, 1
> +    jg .nextrow
> +REP_RET
> +
> +cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom,
> bottom_linesize, dst, dst_linesize, width, start, end
> +    add      topq, widthq
> +    add   bottomq, widthq
> +    add      dstq, widthq
> +    sub      endq, startq
> +    neg    widthq
> +.nextrow:
> +    mov       r10q, widthq
> +    %define      x  r10q
> +
> +    .loop:
> +        movh            m0, [topq + x]
> +        movh            m1, [bottomq + x]
> +        pmaxub          m0, m1
> +        movh    [dstq + x], m0
> +        add           r10q, mmsize / 2

Removed division.

> +    jl .loop
> +
> +    add          topq, top_linesizeq
> +    add       bottomq, bottom_linesizeq
> +    add          dstq, dst_linesizeq
> +    sub          endd, 1
> +    jg .nextrow
> +REP_RET
> +
> +%endif

[...]


More information about the ffmpeg-devel mailing list