[FFmpeg-devel] [PATCH 1/2] x86/vf_w3fdif: move pxor outside the loop in w3fdif_complex_low

Paul B Mahol onemda at gmail.com
Sun Oct 11 18:28:39 CEST 2015


On 10/11/15, James Almer <jamrial at gmail.com> wrote:
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
>  libavfilter/x86/vf_w3fdif.asm | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm
> index 988b847..f02319b 100644
> --- a/libavfilter/x86/vf_w3fdif.asm
> +++ b/libavfilter/x86/vf_w3fdif.asm
> @@ -65,11 +65,12 @@ cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line,
> in_lines_cur0, coef, linesize,
>      jg .loop
>  REP_RET
>
> -cglobal w3fdif_complex_low, 4, 7, 7, 0, work_line, in_lines_cur0, coef,
> linesize
> +cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef,
> linesize
>      movq                  m0, [coefq]
>      DEFINE_ARGS    work_line, in_lines_cur0, in_lines_cur1, linesize,
> offset, in_lines_cur2, in_lines_cur3
>      pshufd                m2, m0, q1111
>      SPLATD                m0
> +    pxor                  m1, m1
>      mov              offsetq, 0
>      mov       in_lines_cur3q, [in_lines_cur0q+gprsize*3]
>      mov       in_lines_cur2q, [in_lines_cur0q+gprsize*2]
> @@ -79,17 +80,16 @@ cglobal w3fdif_complex_low, 4, 7, 7, 0, work_line,
> in_lines_cur0, coef, linesize
>  .loop:
>      movh                                   m4, [in_lines_cur0q+offsetq]
>      movh                                   m5, [in_lines_cur1q+offsetq]
> -    pxor                                   m1, m1
>      punpcklbw                              m4, m1
>      punpcklbw                              m5, m1
> -    SBUTTERFLY                             wd, 4, 5, 3
> +    SBUTTERFLY                             wd, 4, 5, 7
>      pmaddwd                                m4, m0
>      pmaddwd                                m5, m0
>      movh                                   m6, [in_lines_cur2q+offsetq]
>      movh                                   m3, [in_lines_cur3q+offsetq]
>      punpcklbw                              m6, m1
>      punpcklbw                              m3, m1
> -    SBUTTERFLY                             wd, 6, 3, 1
> +    SBUTTERFLY                             wd, 6, 3, 7
>      pmaddwd                                m6, m2
>      pmaddwd                                m3, m2
>      paddd                                  m4, m6
> --
> 2.6.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

fine if it still works on x32.


More information about the ffmpeg-devel mailing list