[FFmpeg-devel] [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit

Sat Sep 23 21:24:16 EEST 2017

On Tue, Sep 19, 2017 at 10:35:30PM +0200, Thomas Mundt wrote:
> 2017-09-19 17:53 GMT+02:00 James Almer <jamrial at gmail.com>:
> 
> > On 9/19/2017 5:02 AM, Thomas Mundt wrote:
> > > 2017-09-19 4:09 GMT+02:00 James Almer <jamrial at gmail.com>:
> > >
> > >> On 9/18/2017 10:41 PM, Thomas Mundt wrote:
> > >>> I tried to set up MIPS compiler for two days on windows and linux
> > without
> > >>> success.
> > >>> Now I try it blind. This solution is based on the first suggestion
> > James
> > >>> gave me at IRC.
> > >>> There might be room for improvement and an alternative solution with
> > >>> AV_RL16() / AV_WL16().
> > >>> I used av_le2ne16() because it will be ignored for little endian.
> > >>>
> > >>> Regards,
> > >>> Thomas
> > >>
> > >>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001
> > >>> From: Thomas Mundt <tmundt75 at gmail.com>
> > >>> Date: Tue, 19 Sep 2017 00:25:25 +0200
> > >>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12
> > bit
> > >>>
> > >>> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
> > >>> ---
> > >>>  libavfilter/interlace.h                        |  5 +-
> > >>>  libavfilter/tinterlace.h                       |  5 +-
> > >>>  libavfilter/vf_interlace.c                     | 92
> > >> ++++++++++++++++++++++----
> > >>>  libavfilter/vf_tinterlace.c                    | 73
> > ++++++++++++++++++--
> > >>>  libavfilter/x86/vf_interlace.asm               | 80
> > >> ++++++++++++++++++++--
> > >>>  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
> > >>>  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
> > >>>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
> > >>>  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
> > >>>  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
> > >>>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
> > >>>  11 files changed, 345 insertions(+), 56 deletions(-)
> > >>>
> > >>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
> > >>> index 2101b79..90a0198 100644
> > >>> --- a/libavfilter/interlace.h
> > >>> +++ b/libavfilter/interlace.h
> > >>> @@ -25,9 +25,11 @@
> > >>>  #ifndef AVFILTER_INTERLACE_H
> > >>>  #define AVFILTER_INTERLACE_H
> > >>>
> > >>> +#include "libavutil/bswap.h"
> > >>>  #include "libavutil/common.h"
> > >>>  #include "libavutil/imgutils.h"
> > >>>  #include "libavutil/opt.h"
> > >>> +#include "libavutil/pixdesc.h"
> > >>>
> > >>>  #include "avfilter.h"
> > >>>  #include "formats.h"
> > >>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext {
> > >>>      enum ScanMode scan;    // top or bottom field first scanning
> > >>>      int lowpass;           // enable or disable low pass filtering
> > >>>      AVFrame *cur, *next;   // the two frames from which the new one is
> > >> obtained
> > >>> +    const AVPixFmtDescriptor *csp;
> > >>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const
> > >> uint8_t *srcp,
> > >>> -                         ptrdiff_t mref, ptrdiff_t pref);
> > >>> +                         ptrdiff_t mref, ptrdiff_t pref, int
> > clip_max);
> > >>>  } InterlaceContext;
> > >>>
> > >>>  void ff_interlace_init_x86(InterlaceContext *interlace);
> > >>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
> > >>> index cc13a6c..b5c39aa 100644
> > >>> --- a/libavfilter/tinterlace.h
> > >>> +++ b/libavfilter/tinterlace.h
> > >>> @@ -27,7 +27,9 @@
> > >>>  #ifndef AVFILTER_TINTERLACE_H
> > >>>  #define AVFILTER_TINTERLACE_H
> > >>>
> > >>> +#include "libavutil/bswap.h"
> > >>>  #include "libavutil/opt.h"
> > >>> +#include "libavutil/pixdesc.h"
> > >>>  #include "drawutils.h"
> > >>>  #include "avfilter.h"
> > >>>
> > >>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
> > >>>      int black_linesize[4];
> > >>>      FFDrawContext draw;
> > >>>      FFDrawColor color;
> > >>> +    const AVPixFmtDescriptor *csp;
> > >>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t
> > >> *srcp,
> > >>> -                         ptrdiff_t mref, ptrdiff_t pref);
> > >>> +                         ptrdiff_t mref, ptrdiff_t pref, int
> > clip_max);
> > >>>  } TInterlaceContext;
> > >>>
> > >>>  void ff_tinterlace_init_x86(TInterlaceContext *interlace);
> > >>> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
> > >>> index 55bf782..bfba054 100644
> > >>> --- a/libavfilter/vf_interlace.c
> > >>> +++ b/libavfilter/vf_interlace.c
> > >>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
> > >>>  AVFILTER_DEFINE_CLASS(interlace);
> > >>>
> > >>>  static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
> > >>> -                           const uint8_t *srcp,
> > >>> -                           ptrdiff_t mref, ptrdiff_t pref)
> > >>> +                           const uint8_t *srcp, ptrdiff_t mref,
> > >>> +                           ptrdiff_t pref, int clip_max)
> > >>>  {
> > >>>      const uint8_t *srcp_above = srcp + mref;
> > >>>      const uint8_t *srcp_below = srcp + pref;
> > >>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t
> > >> linesize,
> > >>>      }
> > >>>  }
> > >>>
> > >>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
> > >>> +                              const uint8_t *src8, ptrdiff_t mref,
> > >>> +                              ptrdiff_t pref, int clip_max)
> > >>> +{
> > >>> +    uint16_t *dstp = (uint16_t *)dst8;
> > >>> +    const uint16_t *srcp = (const uint16_t *)src8;
> > >>> +    const uint16_t *srcp_above = srcp + mref / 2;
> > >>> +    const uint16_t *srcp_below = srcp + pref / 2;
> > >>> +    int i;
> > >>> +    for (i = 0; i < linesize; i++) {
> > >>> +        // this calculation is an integer representation of
> > >>> +        // '0.5 * current + 0.25 * above + 0.25 * below'
> > >>> +        // '1 +' is for rounding.
> > >>> +        dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) +
> > av_le2ne16(srcp[i])
> >
> > You might want to load srcp[i] into a local variable here as well.
> >
> > >>> +                                + av_le2ne16(srcp_above[i])
> > >>> +                                + av_le2ne16(srcp_below[i])) >> 2);
> > >>
> > >> This might work (And Michael will be able to confirm that if
> > >> filter-pixfmts-tinterlace_vlpf passes)...
> > >>
> > >>> +    }
> > >>> +}
> > >>> +
> > >>>  static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
> > >>> -                                   const uint8_t *srcp,
> > >>> -                                   ptrdiff_t mref, ptrdiff_t pref)
> > >>> +                                   const uint8_t *srcp, ptrdiff_t
> > mref,
> > >>> +                                   ptrdiff_t pref, int clip_max)
> > >>>  {
> > >>>      const uint8_t *srcp_above = srcp + mref;
> > >>>      const uint8_t *srcp_below = srcp + pref;
> > >>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp,
> > >> ptrdiff_t linesize,
> > >>>      }
> > >>>  }
> > >>>
> > >>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t
> > >> linesize,
> > >>> +                                      const uint8_t *src8, ptrdiff_t
> > >> mref,
> > >>> +                                      ptrdiff_t pref, int clip_max)
> > >>> +{
> > >>> +    uint16_t *dstp = (uint16_t *)dst8;
> > >>> +    const uint16_t *srcp = (const uint16_t *)src8;
> > >>> +    const uint16_t *srcp_above = srcp + mref / 2;
> > >>> +    const uint16_t *srcp_below = srcp + pref / 2;
> > >>> +    const uint16_t *srcp_above2 = srcp + mref;
> > >>> +    const uint16_t *srcp_below2 = srcp + pref;
> > >>> +    int i, srcp_x, srcp_ab;
> > >>> +    for (i = 0; i < linesize; i++) {
> > >>> +        // this calculation is an integer representation of
> > >>> +        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 *
> > >> above2 - 0.125 * below2'
> > >>> +        // '4 +' is for rounding.
> > >>> +        srcp_x = av_le2ne16(srcp[i]) << 1;
> > >>> +        srcp_ab = av_le2ne16(srcp_above[i]) +
> > av_le2ne16(srcp_below[i]);
> > >>> +        dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) +
> > >> srcp_x + srcp_ab) << 1)
> > >>> +                                     - av_le2ne16(srcp_above2[i])
> > >>> +                                     - av_le2ne16(srcp_below2[i])) >>
> > >> 3, 0, clip_max));
> > >>> +        // Prevent over-sharpening:
> > >>> +        // dst must not exceed src when the average of above and below
> > >>> +        // is less than src. And the other way around.
> > >>> +        if (srcp_ab > srcp_x) {
> > >>> +            if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i]))
> > >>> +                dstp[i] = srcp[i];
> > >>> +        } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i]))
> > >>> +            dstp[i] = srcp[i];
> > >>
> > >> ...but chances are this over-sharpening prevention part will not. You're
> > >> loading in native endianness here before storing. You only byteswapped
> > >> for the comparison.
> > >>
> > >> Also, consider using local variables inside the for loop. You're loading
> > >> scrp[i] and dstp[i] several times per iteration.
> > >>
> > >
> > > Okay, then I would do:
> > >     int i, dstp_le, srcp_le, srcp_x, srcp_ab;
> >
> > These are not pointers, so maybe remove the "p" to avoid confusion.
> >
> > >     for (i = 0; i < linesize; i++) {
> > >         srcp_le = av_le2ne16(srcp[i]);
> > >         srcp_x = srcp_le << 1;
> > >         srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
> > >         dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1)
> > >                                  - av_le2ne16(srcp_above2[i])
> > >                                  - av_le2ne16(srcp_below2[i])) >> 3, 0,
> > clip_max);
> > >         if (srcp_ab > srcp_x) {
> > >             if (dstp_le < srcp_le)
> > >                 dstp[i] = srcp[i];
> > >             else
> > >                 dstp[i] = av_le2ne16(dstp_le);
> > >         } else if (dstp_le > srcp_le) {
> > >             dstp[i] = srcp[i];
> > >         } else
> > >             dstp[i] = av_le2ne16(dstp_le);
> > >     }
> > > Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ?
> >
> > Yes. No need to load srcp[i] again.
> >
> 
> Okay, thanks!
> A patch with your suggestions is attached.
> I will send a separate patch that removes the "p" from srcp_x and src_ab
> in lowpass_line_complex_c function for consistency.

>  libavfilter/interlace.h                        |    5 +
>  libavfilter/tinterlace.h                       |    5 +
>  libavfilter/vf_interlace.c                     |   97 +++++++++++++++++++++----
>  libavfilter/vf_tinterlace.c                    |   78 ++++++++++++++++++--
>  libavfilter/x86/vf_interlace.asm               |   80 ++++++++++++++++++--
>  libavfilter/x86/vf_interlace_init.c            |   51 +++++++++----
>  libavfilter/x86/vf_tinterlace_init.c           |   51 +++++++++----
>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf |   11 ++
>  tests/ref/fate/filter-pixfmts-tinterlace_merge |   11 ++
>  tests/ref/fate/filter-pixfmts-tinterlace_pad   |   11 ++
>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  |   11 ++
>  11 files changed, 355 insertions(+), 56 deletions(-)
> d34a8ac3c7d1213a33f8fb5d144e0c9fd7f694e6  0003-avfilter-interlace-add-support-for-10-and-12-bit.patch
> From 4f7172f258bcad46c401ac0acbb4ef1666466c18 Mon Sep 17 00:00:00 2001
> From: Thomas Mundt <tmundt75 at gmail.com>
> Date: Tue, 19 Sep 2017 22:23:23 +0200
> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit
> 
> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
> ---
>  libavfilter/interlace.h                        |  5 +-
>  libavfilter/tinterlace.h                       |  5 +-
>  libavfilter/vf_interlace.c                     | 97 ++++++++++++++++++++++----
>  libavfilter/vf_tinterlace.c                    | 78 +++++++++++++++++++--
>  libavfilter/x86/vf_interlace.asm               | 80 +++++++++++++++++++--
>  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
>  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
>  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
>  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
>  11 files changed, 355 insertions(+), 56 deletions(-)

Tested on mips qemu and x86-64, works

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

When the tyrant has disposed of foreign enemies by conquest or treaty, and
there is nothing more to fear from them, then he is always stirring up
some war or other, in order that the people may require a leader. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20170923/4bbc71a8/attachment.sig>