[FFmpeg-devel] [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit

Sat Sep 23 21:56:53 EEST 2017

2017-09-23 20:24 GMT+02:00 Michael Niedermayer <michael at niedermayer.cc>:

> On Tue, Sep 19, 2017 at 10:35:30PM +0200, Thomas Mundt wrote:
> > 2017-09-19 17:53 GMT+02:00 James Almer <jamrial at gmail.com>:
> >
> > > On 9/19/2017 5:02 AM, Thomas Mundt wrote:
> > > > 2017-09-19 4:09 GMT+02:00 James Almer <jamrial at gmail.com>:
> > > >
> > > >> On 9/18/2017 10:41 PM, Thomas Mundt wrote:
> > > >>> I tried to set up MIPS compiler for two days on windows and linux
> > > without
> > > >>> success.
> > > >>> Now I try it blind. This solution is based on the first suggestion
> > > James
> > > >>> gave me at IRC.
> > > >>> There might be room for improvement and an alternative solution
> with
> > > >>> AV_RL16() / AV_WL16().
> > > >>> I used av_le2ne16() because it will be ignored for little endian.
> > > >>>
> > > >>> Regards,
> > > >>> Thomas
> > > >>
> > > >>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00
> 2001
> > > >>> From: Thomas Mundt <tmundt75 at gmail.com>
> > > >>> Date: Tue, 19 Sep 2017 00:25:25 +0200
> > > >>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and
> 12
> > > bit
> > > >>>
> > > >>> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
> > > >>> ---
> > > >>>  libavfilter/interlace.h                        |  5 +-
> > > >>>  libavfilter/tinterlace.h                       |  5 +-
> > > >>>  libavfilter/vf_interlace.c                     | 92
> > > >> ++++++++++++++++++++++----
> > > >>>  libavfilter/vf_tinterlace.c                    | 73
> > > ++++++++++++++++++--
> > > >>>  libavfilter/x86/vf_interlace.asm               | 80
> > > >> ++++++++++++++++++++--
> > > >>>  libavfilter/x86/vf_interlace_init.c            | 51
> ++++++++++----
> > > >>>  libavfilter/x86/vf_tinterlace_init.c           | 51
> ++++++++++----
> > > >>>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
> > > >>>  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
> > > >>>  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
> > > >>>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
> > > >>>  11 files changed, 345 insertions(+), 56 deletions(-)
> > > >>>
> > > >>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
> > > >>> index 2101b79..90a0198 100644
> > > >>> --- a/libavfilter/interlace.h
> > > >>> +++ b/libavfilter/interlace.h
> > > >>> @@ -25,9 +25,11 @@
> > > >>>  #ifndef AVFILTER_INTERLACE_H
> > > >>>  #define AVFILTER_INTERLACE_H
> > > >>>
> > > >>> +#include "libavutil/bswap.h"
> > > >>>  #include "libavutil/common.h"
> > > >>>  #include "libavutil/imgutils.h"
> > > >>>  #include "libavutil/opt.h"
> > > >>> +#include "libavutil/pixdesc.h"
> > > >>>
> > > >>>  #include "avfilter.h"
> > > >>>  #include "formats.h"
> > > >>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext {
> > > >>>      enum ScanMode scan;    // top or bottom field first scanning
> > > >>>      int lowpass;           // enable or disable low pass filtering
> > > >>>      AVFrame *cur, *next;   // the two frames from which the new
> one is
> > > >> obtained
> > > >>> +    const AVPixFmtDescriptor *csp;
> > > >>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const
> > > >> uint8_t *srcp,
> > > >>> -                         ptrdiff_t mref, ptrdiff_t pref);
> > > >>> +                         ptrdiff_t mref, ptrdiff_t pref, int
> > > clip_max);
> > > >>>  } InterlaceContext;
> > > >>>
> > > >>>  void ff_interlace_init_x86(InterlaceContext *interlace);
> > > >>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
> > > >>> index cc13a6c..b5c39aa 100644
> > > >>> --- a/libavfilter/tinterlace.h
> > > >>> +++ b/libavfilter/tinterlace.h
> > > >>> @@ -27,7 +27,9 @@
> > > >>>  #ifndef AVFILTER_TINTERLACE_H
> > > >>>  #define AVFILTER_TINTERLACE_H
> > > >>>
> > > >>> +#include "libavutil/bswap.h"
> > > >>>  #include "libavutil/opt.h"
> > > >>> +#include "libavutil/pixdesc.h"
> > > >>>  #include "drawutils.h"
> > > >>>  #include "avfilter.h"
> > > >>>
> > > >>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
> > > >>>      int black_linesize[4];
> > > >>>      FFDrawContext draw;
> > > >>>      FFDrawColor color;
> > > >>> +    const AVPixFmtDescriptor *csp;
> > > >>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const
> uint8_t
> > > >> *srcp,
> > > >>> -                         ptrdiff_t mref, ptrdiff_t pref);
> > > >>> +                         ptrdiff_t mref, ptrdiff_t pref, int
> > > clip_max);
> > > >>>  } TInterlaceContext;
> > > >>>
> > > >>>  void ff_tinterlace_init_x86(TInterlaceContext *interlace);
> > > >>> diff --git a/libavfilter/vf_interlace.c
> b/libavfilter/vf_interlace.c
> > > >>> index 55bf782..bfba054 100644
> > > >>> --- a/libavfilter/vf_interlace.c
> > > >>> +++ b/libavfilter/vf_interlace.c
> > > >>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
> > > >>>  AVFILTER_DEFINE_CLASS(interlace);
> > > >>>
> > > >>>  static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
> > > >>> -                           const uint8_t *srcp,
> > > >>> -                           ptrdiff_t mref, ptrdiff_t pref)
> > > >>> +                           const uint8_t *srcp, ptrdiff_t mref,
> > > >>> +                           ptrdiff_t pref, int clip_max)
> > > >>>  {
> > > >>>      const uint8_t *srcp_above = srcp + mref;
> > > >>>      const uint8_t *srcp_below = srcp + pref;
> > > >>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp,
> ptrdiff_t
> > > >> linesize,
> > > >>>      }
> > > >>>  }
> > > >>>
> > > >>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
> > > >>> +                              const uint8_t *src8, ptrdiff_t mref,
> > > >>> +                              ptrdiff_t pref, int clip_max)
> > > >>> +{
> > > >>> +    uint16_t *dstp = (uint16_t *)dst8;
> > > >>> +    const uint16_t *srcp = (const uint16_t *)src8;
> > > >>> +    const uint16_t *srcp_above = srcp + mref / 2;
> > > >>> +    const uint16_t *srcp_below = srcp + pref / 2;
> > > >>> +    int i;
> > > >>> +    for (i = 0; i < linesize; i++) {
> > > >>> +        // this calculation is an integer representation of
> > > >>> +        // '0.5 * current + 0.25 * above + 0.25 * below'
> > > >>> +        // '1 +' is for rounding.
> > > >>> +        dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) +
> > > av_le2ne16(srcp[i])
> > >
> > > You might want to load srcp[i] into a local variable here as well.
> > >
> > > >>> +                                + av_le2ne16(srcp_above[i])
> > > >>> +                                + av_le2ne16(srcp_below[i])) >>
> 2);
> > > >>
> > > >> This might work (And Michael will be able to confirm that if
> > > >> filter-pixfmts-tinterlace_vlpf passes)...
> > > >>
> > > >>> +    }
> > > >>> +}
> > > >>> +
> > > >>>  static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t
> linesize,
> > > >>> -                                   const uint8_t *srcp,
> > > >>> -                                   ptrdiff_t mref, ptrdiff_t pref)
> > > >>> +                                   const uint8_t *srcp, ptrdiff_t
> > > mref,
> > > >>> +                                   ptrdiff_t pref, int clip_max)
> > > >>>  {
> > > >>>      const uint8_t *srcp_above = srcp + mref;
> > > >>>      const uint8_t *srcp_below = srcp + pref;
> > > >>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t
> *dstp,
> > > >> ptrdiff_t linesize,
> > > >>>      }
> > > >>>  }
> > > >>>
> > > >>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t
> > > >> linesize,
> > > >>> +                                      const uint8_t *src8,
> ptrdiff_t
> > > >> mref,
> > > >>> +                                      ptrdiff_t pref, int
> clip_max)
> > > >>> +{
> > > >>> +    uint16_t *dstp = (uint16_t *)dst8;
> > > >>> +    const uint16_t *srcp = (const uint16_t *)src8;
> > > >>> +    const uint16_t *srcp_above = srcp + mref / 2;
> > > >>> +    const uint16_t *srcp_below = srcp + pref / 2;
> > > >>> +    const uint16_t *srcp_above2 = srcp + mref;
> > > >>> +    const uint16_t *srcp_below2 = srcp + pref;
> > > >>> +    int i, srcp_x, srcp_ab;
> > > >>> +    for (i = 0; i < linesize; i++) {
> > > >>> +        // this calculation is an integer representation of
> > > >>> +        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 *
> > > >> above2 - 0.125 * below2'
> > > >>> +        // '4 +' is for rounding.
> > > >>> +        srcp_x = av_le2ne16(srcp[i]) << 1;
> > > >>> +        srcp_ab = av_le2ne16(srcp_above[i]) +
> > > av_le2ne16(srcp_below[i]);
> > > >>> +        dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) +
> > > >> srcp_x + srcp_ab) << 1)
> > > >>> +                                     - av_le2ne16(srcp_above2[i])
> > > >>> +                                     -
> av_le2ne16(srcp_below2[i])) >>
> > > >> 3, 0, clip_max));
> > > >>> +        // Prevent over-sharpening:
> > > >>> +        // dst must not exceed src when the average of above and
> below
> > > >>> +        // is less than src. And the other way around.
> > > >>> +        if (srcp_ab > srcp_x) {
> > > >>> +            if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i]))
> > > >>> +                dstp[i] = srcp[i];
> > > >>> +        } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i]))
> > > >>> +            dstp[i] = srcp[i];
> > > >>
> > > >> ...but chances are this over-sharpening prevention part will not.
> You're
> > > >> loading in native endianness here before storing. You only
> byteswapped
> > > >> for the comparison.
> > > >>
> > > >> Also, consider using local variables inside the for loop. You're
> loading
> > > >> scrp[i] and dstp[i] several times per iteration.
> > > >>
> > > >
> > > > Okay, then I would do:
> > > >     int i, dstp_le, srcp_le, srcp_x, srcp_ab;
> > >
> > > These are not pointers, so maybe remove the "p" to avoid confusion.
> > >
> > > >     for (i = 0; i < linesize; i++) {
> > > >         srcp_le = av_le2ne16(srcp[i]);
> > > >         srcp_x = srcp_le << 1;
> > > >         srcp_ab = av_le2ne16(srcp_above[i]) +
> av_le2ne16(srcp_below[i]);
> > > >         dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1)
> > > >                                  - av_le2ne16(srcp_above2[i])
> > > >                                  - av_le2ne16(srcp_below2[i])) >> 3,
> 0,
> > > clip_max);
> > > >         if (srcp_ab > srcp_x) {
> > > >             if (dstp_le < srcp_le)
> > > >                 dstp[i] = srcp[i];
> > > >             else
> > > >                 dstp[i] = av_le2ne16(dstp_le);
> > > >         } else if (dstp_le > srcp_le) {
> > > >             dstp[i] = srcp[i];
> > > >         } else
> > > >             dstp[i] = av_le2ne16(dstp_le);
> > > >     }
> > > > Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] =
> srcp[i]; ?
> > >
> > > Yes. No need to load srcp[i] again.
> > >
> >
> > Okay, thanks!
> > A patch with your suggestions is attached.
> > I will send a separate patch that removes the "p" from srcp_x and src_ab
> > in lowpass_line_complex_c function for consistency.
>
> >  libavfilter/interlace.h                        |    5 +
> >  libavfilter/tinterlace.h                       |    5 +
> >  libavfilter/vf_interlace.c                     |   97
> +++++++++++++++++++++----
> >  libavfilter/vf_tinterlace.c                    |   78
> ++++++++++++++++++--
> >  libavfilter/x86/vf_interlace.asm               |   80
> ++++++++++++++++++--
> >  libavfilter/x86/vf_interlace_init.c            |   51 +++++++++----
> >  libavfilter/x86/vf_tinterlace_init.c           |   51 +++++++++----
> >  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf |   11 ++
> >  tests/ref/fate/filter-pixfmts-tinterlace_merge |   11 ++
> >  tests/ref/fate/filter-pixfmts-tinterlace_pad   |   11 ++
> >  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  |   11 ++
> >  11 files changed, 355 insertions(+), 56 deletions(-)
> > d34a8ac3c7d1213a33f8fb5d144e0c9fd7f694e6  0003-avfilter-interlace-add-
> support-for-10-and-12-bit.patch
> > From 4f7172f258bcad46c401ac0acbb4ef1666466c18 Mon Sep 17 00:00:00 2001
> > From: Thomas Mundt <tmundt75 at gmail.com>
> > Date: Tue, 19 Sep 2017 22:23:23 +0200
> > Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit
> >
> > Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
> > ---
> >  libavfilter/interlace.h                        |  5 +-
> >  libavfilter/tinterlace.h                       |  5 +-
> >  libavfilter/vf_interlace.c                     | 97
> ++++++++++++++++++++++----
> >  libavfilter/vf_tinterlace.c                    | 78
> +++++++++++++++++++--
> >  libavfilter/x86/vf_interlace.asm               | 80
> +++++++++++++++++++--
> >  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
> >  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
> >  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
> >  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
> >  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
> >  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
> >  11 files changed, 355 insertions(+), 56 deletions(-)
>
> Tested on mips qemu and x86-64, works
>

Thanks!