[FFmpeg-devel] [PATCH] avfilter/vf_interlace: add complex vertcal lowpassfilter

James Almer jamrial at gmail.com
Fri Mar 10 21:59:21 EET 2017


On 3/8/2017 1:58 PM, Thomas Mundt wrote:
> Hi,
> 
> attached patch adds a complex (-1 2 6 2 -1) vertcal lowpassfilter to vf_interlace. This will better retain detail and reduce blurring compared to the existing (1 2 1) filter.
> 
> Please comment.
> 
> 
> 0001-avfilter-vf_interlace-add-complex-vertcal-lowpassfil.patch
> 
> 
> From f157bc9b898d1215f8ec10b301720a9d9ff03c63 Mon Sep 17 00:00:00 2001
> From: Thomas Mundt <loudmax at yahoo.de>
> Date: Wed, 8 Mar 2017 17:33:18 +0100
> Subject: [PATCH] avfilter/vf_interlace: add complex vertcal lowpassfilter Add
>  a complex (-1 2 6 2 -1) filter to reduce blurring compared to the existing (1
>  2 1) filter.
> 
> Signed-off-by: Thomas Mundt <loudmax at yahoo.de>
> ---
>  doc/filters.texi                    | 13 ++++++--
>  libavfilter/interlace.h             |  2 ++
>  libavfilter/vf_interlace.c          | 42 ++++++++++++++++++++++--
>  libavfilter/x86/vf_interlace.asm    | 65 +++++++++++++++++++++++++++++++++++++
>  libavfilter/x86/vf_interlace_init.c | 17 +++++++---
>  5 files changed, 130 insertions(+), 9 deletions(-)
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index b5265d9..0041d39 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -9109,8 +9109,17 @@ This determines whether the interlaced frame is taken from the even
>  (tff - default) or odd (bff) lines of the progressive frame.
>  
>  @item lowpass
> -Enable (default) or disable the vertical lowpass filter to avoid twitter
> -interlacing and reduce moire patterns.
> +Vertical lowpass filter to avoid twitter interlacing and
> +reduce moire patterns.
> +
> + at table @samp
> + at item 0
> +Disable vertical lowpass filter
> + at item 1
> +Enable linear filter (default)
> + at item 2
> +Enable complex filter
> + at end table
>  @end table
>  
>  @section kerndeint
> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
> index da073ae..7ad457e 100644
> --- a/libavfilter/interlace.h
> +++ b/libavfilter/interlace.h
> @@ -51,6 +51,8 @@ typedef struct InterlaceContext {
>      AVFrame *cur, *next;   // the two frames from which the new one is obtained
>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
>                           const uint8_t *srcp_above, const uint8_t *srcp_below);
> +    void (*lowpass_line_complex)(uint8_t *dstp, ptrdiff_t linesize,
> +                                 const uint8_t *srcp, int mref, int pref);

Why not keep a single prototype, passing mref and pref for both linear
and complex? You can calculate srcp_above and srcp_below for linear like
you're doing it for complex in both the c and asm versions.

In any case, mref and pref should be ptrdiff_t and not int.

>  } InterlaceContext;
>  
>  void ff_interlace_init_x86(InterlaceContext *interlace);
> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
> index efa3128..e8d5de4 100644
> --- a/libavfilter/vf_interlace.c
> +++ b/libavfilter/vf_interlace.c
> @@ -47,7 +47,7 @@ static const AVOption interlace_options[] = {
>      { "bff", "bottom field first", 0,
>          AV_OPT_TYPE_CONST, {.i64 = MODE_BFF }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "scan" },
>      { "lowpass", "set vertical low-pass filter", OFFSET(lowpass),
> -        AV_OPT_TYPE_BOOL,  {.i64 = 1 },        0, 1, .flags = FLAGS },
> +        AV_OPT_TYPE_INT,   {.i64 = 1 },        0, 2, .flags = FLAGS },

Maybe add AV_OPT_TYPE_CONST values "off", "linear" and "complex".

>      { NULL }
>  };
>  
> @@ -67,6 +67,24 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
>      }
>  }
>  
> +static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
> +                                   const uint8_t *srcp, int mref, int pref)
> +{
> +    const uint8_t *srcp_above = srcp + mref;
> +    const uint8_t *srcp_below = srcp + pref;
> +    const uint8_t *srcp_above2 = srcp + mref * 2;
> +    const uint8_t *srcp_below2 = srcp + pref * 2;
> +    int i;
> +    for (i = 0; i < linesize; i++) {
> +        // this calculation is an integer representation of
> +        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
> +        // '4 +' is for rounding.
> +        dstp[i] = av_clip_uint8((4 + (srcp[i] << 2)
> +                  + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1)
> +                  - srcp_above2[i] - srcp_below2[i]) >> 3);
> +    }
> +}
> +
>  static const enum AVPixelFormat formats_supported[] = {
>      AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV444P,
>      AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV410P,  AV_PIX_FMT_YUVA420P,
> @@ -116,7 +134,11 @@ static int config_out_props(AVFilterLink *outlink)
>  
>  
>      if (s->lowpass) {
> -        s->lowpass_line = lowpass_line_c;
> +        if (s->lowpass == 1)
> +            s->lowpass_line = lowpass_line_c;
> +        else if (s->lowpass == 2)
> +            s->lowpass_line_complex = lowpass_line_complex_c;
> +
>          if (ARCH_X86)
>              ff_interlace_init_x86(s);
>      }
> @@ -150,7 +172,7 @@ static void copy_picture_field(InterlaceContext *s,
>              srcp += src_frame->linesize[plane];
>              dstp += dst_frame->linesize[plane];
>          }
> -        if (lowpass) {
> +        if (lowpass == 1) {
>              int srcp_linesize = src_frame->linesize[plane] * 2;
>              int dstp_linesize = dst_frame->linesize[plane] * 2;
>              for (j = lines; j > 0; j--) {
> @@ -164,6 +186,20 @@ static void copy_picture_field(InterlaceContext *s,
>                  dstp += dstp_linesize;
>                  srcp += srcp_linesize;
>              }
> +        } else if (lowpass == 2) {
> +            int srcp_linesize = src_frame->linesize[plane] * 2;
> +            int dstp_linesize = dst_frame->linesize[plane] * 2;
> +            for (j = lines; j > 0; j--) {
> +                int pref = src_frame->linesize[plane];
> +                int mref = -pref;
> +                if (j >= (lines - 1))
> +                    mref = 0;
> +                else if (j <= 2)
> +                    pref = 0;
> +                s->lowpass_line_complex(dstp, cols, srcp, mref, pref);
> +                dstp += dstp_linesize;
> +                srcp += srcp_linesize;
> +            }
>          } else {
>              av_image_copy_plane(dstp, dst_frame->linesize[plane] * 2,
>                                  srcp, src_frame->linesize[plane] * 2,
> diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
> index f70c700..777a95f 100644
> --- a/libavfilter/x86/vf_interlace.asm
> +++ b/libavfilter/x86/vf_interlace.asm
> @@ -25,6 +25,8 @@
>  
>  SECTION_RODATA
>  
> +pw_4: times 8 dw 4
> +
>  SECTION .text
>  
>  %macro LOWPASS_LINE 0
> @@ -56,6 +58,63 @@ cglobal lowpass_line, 5, 5, 7
>      add r1, 2*mmsize
>      jl .loop
>  REP_RET
> +
> +%endmacro
> +
> +%macro LOWPASS_LINE_COMPLEX 0
> +cglobal lowpass_line_complex, 3, 5, 7, dst, h, src, mref, pref

Make this 5, 5, 7 and remove the two movs below.

> +    mov r3, mrefmp
> +    mov r4, prefmp
> +
> +    pxor m6, m6
> +.loop:
> +    mova m0, [srcq+r3]

[srcq+mrefq]

> +    mova m2, [srcq+r4]

[srcq+prefq]

> +    mova m1, m0
> +    mova m3, m2
> +    punpcklbw m0, m6
> +    punpcklbw m2, m6
> +    punpckhbw m1, m6
> +    punpckhbw m3, m6
> +    paddw m0, m2
> +    paddw m1, m3
> +    mova m2, [srcq+r3*2]

[srcq+mrefq*2]

> +    mova m4, [srcq+r4*2]

[srcq+prefq*2]

> +    mova m3, m2
> +    mova m5, m4
> +    punpcklbw m2, m6
> +    punpcklbw m4, m6
> +    punpckhbw m3, m6
> +    punpckhbw m5, m6
> +    paddw m2, m4
> +    paddw m3, m5
> +    mova m4, [srcq]
> +    mova m5, m4
> +    punpcklbw m4, m6
> +    punpckhbw m5, m6
> +    paddw m0, m4
> +    paddw m1, m5
> +    psllw m0, 1
> +    psllw m1, 1
> +    psllw m4, 2
> +    psllw m5, 2
> +    paddw m0, m4
> +    paddw m1, m5
> +    paddw m0, [pw_4]
> +    paddw m1, [pw_4]
> +    psubusw m0, m2
> +    psubusw m1, m3
> +    psrlw m0, 3
> +    psrlw m1, 3
> +    packuswb m0, m1
> +    mova [dstq], m0
> +
> +    add dstq, mmsize
> +    add srcq, mmsize
> +    sub DWORD hm, mmsize

sub hd, mmsize

> +    jg .loop
> +REP_RET
> +
>  %endmacro
>  
>  INIT_XMM sse2
> @@ -63,3 +122,9 @@ LOWPASS_LINE
>  
>  INIT_XMM avx
>  LOWPASS_LINE
> +
> +INIT_XMM sse2
> +LOWPASS_LINE_COMPLEX
> +
> +INIT_XMM avx
> +LOWPASS_LINE_COMPLEX

There's no reason to add an AVX version. You're not taking advantage of
the 3 operand instruction format, or any of the new instructions, or the
32 byte regs.

If you merge some of the movas with punpck* instructions and can notice
a difference when benching, then AVX would make sense.

> diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
> index 52a22f8..947ff9a 100644
> --- a/libavfilter/x86/vf_interlace_init.c
> +++ b/libavfilter/x86/vf_interlace_init.c
> @@ -35,12 +35,21 @@ void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
>                            const uint8_t *srcp_above,
>                            const uint8_t *srcp_below);
>  
> +void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
> +                                  const uint8_t *srcp, int mref, int pref);
> +void ff_lowpass_line_complex_avx (uint8_t *dstp, ptrdiff_t linesize,
> +                                  const uint8_t *srcp, int mref, int pref);
> +
>  av_cold void ff_interlace_init_x86(InterlaceContext *s)
>  {
>      int cpu_flags = av_get_cpu_flags();
>  
> -    if (EXTERNAL_SSE2(cpu_flags))
> -        s->lowpass_line = ff_lowpass_line_sse2;
> -    if (EXTERNAL_AVX(cpu_flags))
> -        s->lowpass_line = ff_lowpass_line_avx;
> +    if (EXTERNAL_SSE2(cpu_flags)) {
> +        s->lowpass_line         = ff_lowpass_line_sse2;
> +        s->lowpass_line_complex = ff_lowpass_line_complex_sse2;
> +    }
> +    if (EXTERNAL_AVX(cpu_flags)) {
> +        s->lowpass_line         = ff_lowpass_line_avx;
> +        s->lowpass_line_complex = ff_lowpass_line_complex_avx;
> +    }
>  }
> -- 2.7.4.windows.1
> 
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 



More information about the ffmpeg-devel mailing list