[FFmpeg-devel] [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit

Tue Sep 19 05:09:20 EEST 2017

On 9/18/2017 10:41 PM, Thomas Mundt wrote:
> I tried to set up MIPS compiler for two days on windows and linux without
> success.
> Now I try it blind. This solution is based on the first suggestion James
> gave me at IRC.
> There might be room for improvement and an alternative solution with
> AV_RL16() / AV_WL16().
> I used av_le2ne16() because it will be ignored for little endian.
> 
> Regards,
> Thomas

> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001
> From: Thomas Mundt <tmundt75 at gmail.com>
> Date: Tue, 19 Sep 2017 00:25:25 +0200
> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit
> 
> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
> ---
>  libavfilter/interlace.h                        |  5 +-
>  libavfilter/tinterlace.h                       |  5 +-
>  libavfilter/vf_interlace.c                     | 92 ++++++++++++++++++++++----
>  libavfilter/vf_tinterlace.c                    | 73 ++++++++++++++++++--
>  libavfilter/x86/vf_interlace.asm               | 80 ++++++++++++++++++++--
>  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
>  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
>  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
>  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
>  11 files changed, 345 insertions(+), 56 deletions(-)
> 
> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
> index 2101b79..90a0198 100644
> --- a/libavfilter/interlace.h
> +++ b/libavfilter/interlace.h
> @@ -25,9 +25,11 @@
>  #ifndef AVFILTER_INTERLACE_H
>  #define AVFILTER_INTERLACE_H
>  
> +#include "libavutil/bswap.h"
>  #include "libavutil/common.h"
>  #include "libavutil/imgutils.h"
>  #include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
>  
>  #include "avfilter.h"
>  #include "formats.h"
> @@ -55,8 +57,9 @@ typedef struct InterlaceContext {
>      enum ScanMode scan;    // top or bottom field first scanning
>      int lowpass;           // enable or disable low pass filtering
>      AVFrame *cur, *next;   // the two frames from which the new one is obtained
> +    const AVPixFmtDescriptor *csp;
>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
> -                         ptrdiff_t mref, ptrdiff_t pref);
> +                         ptrdiff_t mref, ptrdiff_t pref, int clip_max);
>  } InterlaceContext;
>  
>  void ff_interlace_init_x86(InterlaceContext *interlace);
> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
> index cc13a6c..b5c39aa 100644
> --- a/libavfilter/tinterlace.h
> +++ b/libavfilter/tinterlace.h
> @@ -27,7 +27,9 @@
>  #ifndef AVFILTER_TINTERLACE_H
>  #define AVFILTER_TINTERLACE_H
>  
> +#include "libavutil/bswap.h"
>  #include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
>  #include "drawutils.h"
>  #include "avfilter.h"
>  
> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
>      int black_linesize[4];
>      FFDrawContext draw;
>      FFDrawColor color;
> +    const AVPixFmtDescriptor *csp;
>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
> -                         ptrdiff_t mref, ptrdiff_t pref);
> +                         ptrdiff_t mref, ptrdiff_t pref, int clip_max);
>  } TInterlaceContext;
>  
>  void ff_tinterlace_init_x86(TInterlaceContext *interlace);
> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
> index 55bf782..bfba054 100644
> --- a/libavfilter/vf_interlace.c
> +++ b/libavfilter/vf_interlace.c
> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
>  AVFILTER_DEFINE_CLASS(interlace);
>  
>  static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
> -                           const uint8_t *srcp,
> -                           ptrdiff_t mref, ptrdiff_t pref)
> +                           const uint8_t *srcp, ptrdiff_t mref,
> +                           ptrdiff_t pref, int clip_max)
>  {
>      const uint8_t *srcp_above = srcp + mref;
>      const uint8_t *srcp_below = srcp + pref;
> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
>      }
>  }
>  
> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
> +                              const uint8_t *src8, ptrdiff_t mref,
> +                              ptrdiff_t pref, int clip_max)
> +{
> +    uint16_t *dstp = (uint16_t *)dst8;
> +    const uint16_t *srcp = (const uint16_t *)src8;
> +    const uint16_t *srcp_above = srcp + mref / 2;
> +    const uint16_t *srcp_below = srcp + pref / 2;
> +    int i;
> +    for (i = 0; i < linesize; i++) {
> +        // this calculation is an integer representation of
> +        // '0.5 * current + 0.25 * above + 0.25 * below'
> +        // '1 +' is for rounding.
> +        dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) + av_le2ne16(srcp[i])
> +                                + av_le2ne16(srcp_above[i])
> +                                + av_le2ne16(srcp_below[i])) >> 2);

This might work (And Michael will be able to confirm that if
filter-pixfmts-tinterlace_vlpf passes)...

> +    }
> +}
> +
>  static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
> -                                   const uint8_t *srcp,
> -                                   ptrdiff_t mref, ptrdiff_t pref)
> +                                   const uint8_t *srcp, ptrdiff_t mref,
> +                                   ptrdiff_t pref, int clip_max)
>  {
>      const uint8_t *srcp_above = srcp + mref;
>      const uint8_t *srcp_below = srcp + pref;
> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
>      }
>  }
>  
> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t linesize,
> +                                      const uint8_t *src8, ptrdiff_t mref,
> +                                      ptrdiff_t pref, int clip_max)
> +{
> +    uint16_t *dstp = (uint16_t *)dst8;
> +    const uint16_t *srcp = (const uint16_t *)src8;
> +    const uint16_t *srcp_above = srcp + mref / 2;
> +    const uint16_t *srcp_below = srcp + pref / 2;
> +    const uint16_t *srcp_above2 = srcp + mref;
> +    const uint16_t *srcp_below2 = srcp + pref;
> +    int i, srcp_x, srcp_ab;
> +    for (i = 0; i < linesize; i++) {
> +        // this calculation is an integer representation of
> +        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
> +        // '4 +' is for rounding.
> +        srcp_x = av_le2ne16(srcp[i]) << 1;
> +        srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
> +        dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) + srcp_x + srcp_ab) << 1)
> +                                     - av_le2ne16(srcp_above2[i])
> +                                     - av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max));
> +        // Prevent over-sharpening:
> +        // dst must not exceed src when the average of above and below
> +        // is less than src. And the other way around.
> +        if (srcp_ab > srcp_x) {
> +            if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i]))
> +                dstp[i] = srcp[i];
> +        } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i]))
> +            dstp[i] = srcp[i];

...but chances are this over-sharpening prevention part will not. You're
loading in native endianness here before storing. You only byteswapped
for the comparison.

Also, consider using local variables inside the for loop. You're loading
scrp[i] and dstp[i] several times per iteration.