[FFmpeg-devel] [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit

Tue Sep 19 18:53:36 EEST 2017

On 9/19/2017 5:02 AM, Thomas Mundt wrote:
> 2017-09-19 4:09 GMT+02:00 James Almer <jamrial at gmail.com>:
> 
>> On 9/18/2017 10:41 PM, Thomas Mundt wrote:
>>> I tried to set up MIPS compiler for two days on windows and linux without
>>> success.
>>> Now I try it blind. This solution is based on the first suggestion James
>>> gave me at IRC.
>>> There might be room for improvement and an alternative solution with
>>> AV_RL16() / AV_WL16().
>>> I used av_le2ne16() because it will be ignored for little endian.
>>>
>>> Regards,
>>> Thomas
>>
>>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001
>>> From: Thomas Mundt <tmundt75 at gmail.com>
>>> Date: Tue, 19 Sep 2017 00:25:25 +0200
>>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit
>>>
>>> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
>>> ---
>>>  libavfilter/interlace.h                        |  5 +-
>>>  libavfilter/tinterlace.h                       |  5 +-
>>>  libavfilter/vf_interlace.c                     | 92
>> ++++++++++++++++++++++----
>>>  libavfilter/vf_tinterlace.c                    | 73 ++++++++++++++++++--
>>>  libavfilter/x86/vf_interlace.asm               | 80
>> ++++++++++++++++++++--
>>>  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
>>>  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
>>>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
>>>  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
>>>  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
>>>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
>>>  11 files changed, 345 insertions(+), 56 deletions(-)
>>>
>>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
>>> index 2101b79..90a0198 100644
>>> --- a/libavfilter/interlace.h
>>> +++ b/libavfilter/interlace.h
>>> @@ -25,9 +25,11 @@
>>>  #ifndef AVFILTER_INTERLACE_H
>>>  #define AVFILTER_INTERLACE_H
>>>
>>> +#include "libavutil/bswap.h"
>>>  #include "libavutil/common.h"
>>>  #include "libavutil/imgutils.h"
>>>  #include "libavutil/opt.h"
>>> +#include "libavutil/pixdesc.h"
>>>
>>>  #include "avfilter.h"
>>>  #include "formats.h"
>>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext {
>>>      enum ScanMode scan;    // top or bottom field first scanning
>>>      int lowpass;           // enable or disable low pass filtering
>>>      AVFrame *cur, *next;   // the two frames from which the new one is
>> obtained
>>> +    const AVPixFmtDescriptor *csp;
>>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const
>> uint8_t *srcp,
>>> -                         ptrdiff_t mref, ptrdiff_t pref);
>>> +                         ptrdiff_t mref, ptrdiff_t pref, int clip_max);
>>>  } InterlaceContext;
>>>
>>>  void ff_interlace_init_x86(InterlaceContext *interlace);
>>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
>>> index cc13a6c..b5c39aa 100644
>>> --- a/libavfilter/tinterlace.h
>>> +++ b/libavfilter/tinterlace.h
>>> @@ -27,7 +27,9 @@
>>>  #ifndef AVFILTER_TINTERLACE_H
>>>  #define AVFILTER_TINTERLACE_H
>>>
>>> +#include "libavutil/bswap.h"
>>>  #include "libavutil/opt.h"
>>> +#include "libavutil/pixdesc.h"
>>>  #include "drawutils.h"
>>>  #include "avfilter.h"
>>>
>>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
>>>      int black_linesize[4];
>>>      FFDrawContext draw;
>>>      FFDrawColor color;
>>> +    const AVPixFmtDescriptor *csp;
>>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t
>> *srcp,
>>> -                         ptrdiff_t mref, ptrdiff_t pref);
>>> +                         ptrdiff_t mref, ptrdiff_t pref, int clip_max);
>>>  } TInterlaceContext;
>>>
>>>  void ff_tinterlace_init_x86(TInterlaceContext *interlace);
>>> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
>>> index 55bf782..bfba054 100644
>>> --- a/libavfilter/vf_interlace.c
>>> +++ b/libavfilter/vf_interlace.c
>>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
>>>  AVFILTER_DEFINE_CLASS(interlace);
>>>
>>>  static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
>>> -                           const uint8_t *srcp,
>>> -                           ptrdiff_t mref, ptrdiff_t pref)
>>> +                           const uint8_t *srcp, ptrdiff_t mref,
>>> +                           ptrdiff_t pref, int clip_max)
>>>  {
>>>      const uint8_t *srcp_above = srcp + mref;
>>>      const uint8_t *srcp_below = srcp + pref;
>>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t
>> linesize,
>>>      }
>>>  }
>>>
>>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
>>> +                              const uint8_t *src8, ptrdiff_t mref,
>>> +                              ptrdiff_t pref, int clip_max)
>>> +{
>>> +    uint16_t *dstp = (uint16_t *)dst8;
>>> +    const uint16_t *srcp = (const uint16_t *)src8;
>>> +    const uint16_t *srcp_above = srcp + mref / 2;
>>> +    const uint16_t *srcp_below = srcp + pref / 2;
>>> +    int i;
>>> +    for (i = 0; i < linesize; i++) {
>>> +        // this calculation is an integer representation of
>>> +        // '0.5 * current + 0.25 * above + 0.25 * below'
>>> +        // '1 +' is for rounding.
>>> +        dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) + av_le2ne16(srcp[i])

You might want to load srcp[i] into a local variable here as well.

>>> +                                + av_le2ne16(srcp_above[i])
>>> +                                + av_le2ne16(srcp_below[i])) >> 2);
>>
>> This might work (And Michael will be able to confirm that if
>> filter-pixfmts-tinterlace_vlpf passes)...
>>
>>> +    }
>>> +}
>>> +
>>>  static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
>>> -                                   const uint8_t *srcp,
>>> -                                   ptrdiff_t mref, ptrdiff_t pref)
>>> +                                   const uint8_t *srcp, ptrdiff_t mref,
>>> +                                   ptrdiff_t pref, int clip_max)
>>>  {
>>>      const uint8_t *srcp_above = srcp + mref;
>>>      const uint8_t *srcp_below = srcp + pref;
>>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp,
>> ptrdiff_t linesize,
>>>      }
>>>  }
>>>
>>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t
>> linesize,
>>> +                                      const uint8_t *src8, ptrdiff_t
>> mref,
>>> +                                      ptrdiff_t pref, int clip_max)
>>> +{
>>> +    uint16_t *dstp = (uint16_t *)dst8;
>>> +    const uint16_t *srcp = (const uint16_t *)src8;
>>> +    const uint16_t *srcp_above = srcp + mref / 2;
>>> +    const uint16_t *srcp_below = srcp + pref / 2;
>>> +    const uint16_t *srcp_above2 = srcp + mref;
>>> +    const uint16_t *srcp_below2 = srcp + pref;
>>> +    int i, srcp_x, srcp_ab;
>>> +    for (i = 0; i < linesize; i++) {
>>> +        // this calculation is an integer representation of
>>> +        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 *
>> above2 - 0.125 * below2'
>>> +        // '4 +' is for rounding.
>>> +        srcp_x = av_le2ne16(srcp[i]) << 1;
>>> +        srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
>>> +        dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) +
>> srcp_x + srcp_ab) << 1)
>>> +                                     - av_le2ne16(srcp_above2[i])
>>> +                                     - av_le2ne16(srcp_below2[i])) >>
>> 3, 0, clip_max));
>>> +        // Prevent over-sharpening:
>>> +        // dst must not exceed src when the average of above and below
>>> +        // is less than src. And the other way around.
>>> +        if (srcp_ab > srcp_x) {
>>> +            if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i]))
>>> +                dstp[i] = srcp[i];
>>> +        } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i]))
>>> +            dstp[i] = srcp[i];
>>
>> ...but chances are this over-sharpening prevention part will not. You're
>> loading in native endianness here before storing. You only byteswapped
>> for the comparison.
>>
>> Also, consider using local variables inside the for loop. You're loading
>> scrp[i] and dstp[i] several times per iteration.
>>
> 
> Okay, then I would do:
>     int i, dstp_le, srcp_le, srcp_x, srcp_ab;

These are not pointers, so maybe remove the "p" to avoid confusion.

>     for (i = 0; i < linesize; i++) {
>         srcp_le = av_le2ne16(srcp[i]);
>         srcp_x = srcp_le << 1;
>         srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
>         dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1)
>                                  - av_le2ne16(srcp_above2[i])
>                                  - av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max);
>         if (srcp_ab > srcp_x) {
>             if (dstp_le < srcp_le)
>                 dstp[i] = srcp[i];
>             else
>                 dstp[i] = av_le2ne16(dstp_le);
>         } else if (dstp_le > srcp_le) {
>             dstp[i] = srcp[i];
>         } else
>             dstp[i] = av_le2ne16(dstp_le);
>     }
> Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ?

Yes. No need to load srcp[i] again.

> Don´t two consecutive byte swaps cancel each other out?

You byteswap the native endian local variable (big in this case) for
storing.

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>