[FFmpeg-devel] [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit

Sat Sep 23 22:21:06 EEST 2017

On 9/23/2017 3:24 PM, Michael Niedermayer wrote:
> On Tue, Sep 19, 2017 at 10:35:30PM +0200, Thomas Mundt wrote:
>> 2017-09-19 17:53 GMT+02:00 James Almer <jamrial at gmail.com>:
>>
>>> On 9/19/2017 5:02 AM, Thomas Mundt wrote:
>>>> 2017-09-19 4:09 GMT+02:00 James Almer <jamrial at gmail.com>:
>>>>
>>>>> On 9/18/2017 10:41 PM, Thomas Mundt wrote:
>>>>>> I tried to set up MIPS compiler for two days on windows and linux
>>> without
>>>>>> success.
>>>>>> Now I try it blind. This solution is based on the first suggestion
>>> James
>>>>>> gave me at IRC.
>>>>>> There might be room for improvement and an alternative solution with
>>>>>> AV_RL16() / AV_WL16().
>>>>>> I used av_le2ne16() because it will be ignored for little endian.
>>>>>>
>>>>>> Regards,
>>>>>> Thomas
>>>>>
>>>>>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001
>>>>>> From: Thomas Mundt <tmundt75 at gmail.com>
>>>>>> Date: Tue, 19 Sep 2017 00:25:25 +0200
>>>>>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12
>>> bit
>>>>>>
>>>>>> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
>>>>>> ---
>>>>>>  libavfilter/interlace.h                        |  5 +-
>>>>>>  libavfilter/tinterlace.h                       |  5 +-
>>>>>>  libavfilter/vf_interlace.c                     | 92
>>>>> ++++++++++++++++++++++----
>>>>>>  libavfilter/vf_tinterlace.c                    | 73
>>> ++++++++++++++++++--
>>>>>>  libavfilter/x86/vf_interlace.asm               | 80
>>>>> ++++++++++++++++++++--
>>>>>>  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
>>>>>>  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
>>>>>>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
>>>>>>  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
>>>>>>  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
>>>>>>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
>>>>>>  11 files changed, 345 insertions(+), 56 deletions(-)
>>>>>>
>>>>>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
>>>>>> index 2101b79..90a0198 100644
>>>>>> --- a/libavfilter/interlace.h
>>>>>> +++ b/libavfilter/interlace.h
>>>>>> @@ -25,9 +25,11 @@
>>>>>>  #ifndef AVFILTER_INTERLACE_H
>>>>>>  #define AVFILTER_INTERLACE_H
>>>>>>
>>>>>> +#include "libavutil/bswap.h"
>>>>>>  #include "libavutil/common.h"
>>>>>>  #include "libavutil/imgutils.h"
>>>>>>  #include "libavutil/opt.h"
>>>>>> +#include "libavutil/pixdesc.h"
>>>>>>
>>>>>>  #include "avfilter.h"
>>>>>>  #include "formats.h"
>>>>>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext {
>>>>>>      enum ScanMode scan;    // top or bottom field first scanning
>>>>>>      int lowpass;           // enable or disable low pass filtering
>>>>>>      AVFrame *cur, *next;   // the two frames from which the new one is
>>>>> obtained
>>>>>> +    const AVPixFmtDescriptor *csp;
>>>>>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const
>>>>> uint8_t *srcp,
>>>>>> -                         ptrdiff_t mref, ptrdiff_t pref);
>>>>>> +                         ptrdiff_t mref, ptrdiff_t pref, int
>>> clip_max);
>>>>>>  } InterlaceContext;
>>>>>>
>>>>>>  void ff_interlace_init_x86(InterlaceContext *interlace);
>>>>>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
>>>>>> index cc13a6c..b5c39aa 100644
>>>>>> --- a/libavfilter/tinterlace.h
>>>>>> +++ b/libavfilter/tinterlace.h
>>>>>> @@ -27,7 +27,9 @@
>>>>>>  #ifndef AVFILTER_TINTERLACE_H
>>>>>>  #define AVFILTER_TINTERLACE_H
>>>>>>
>>>>>> +#include "libavutil/bswap.h"
>>>>>>  #include "libavutil/opt.h"
>>>>>> +#include "libavutil/pixdesc.h"
>>>>>>  #include "drawutils.h"
>>>>>>  #include "avfilter.h"
>>>>>>
>>>>>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
>>>>>>      int black_linesize[4];
>>>>>>      FFDrawContext draw;
>>>>>>      FFDrawColor color;
>>>>>> +    const AVPixFmtDescriptor *csp;
>>>>>>      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t
>>>>> *srcp,
>>>>>> -                         ptrdiff_t mref, ptrdiff_t pref);
>>>>>> +                         ptrdiff_t mref, ptrdiff_t pref, int
>>> clip_max);
>>>>>>  } TInterlaceContext;
>>>>>>
>>>>>>  void ff_tinterlace_init_x86(TInterlaceContext *interlace);
>>>>>> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
>>>>>> index 55bf782..bfba054 100644
>>>>>> --- a/libavfilter/vf_interlace.c
>>>>>> +++ b/libavfilter/vf_interlace.c
>>>>>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
>>>>>>  AVFILTER_DEFINE_CLASS(interlace);
>>>>>>
>>>>>>  static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
>>>>>> -                           const uint8_t *srcp,
>>>>>> -                           ptrdiff_t mref, ptrdiff_t pref)
>>>>>> +                           const uint8_t *srcp, ptrdiff_t mref,
>>>>>> +                           ptrdiff_t pref, int clip_max)
>>>>>>  {
>>>>>>      const uint8_t *srcp_above = srcp + mref;
>>>>>>      const uint8_t *srcp_below = srcp + pref;
>>>>>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t
>>>>> linesize,
>>>>>>      }
>>>>>>  }
>>>>>>
>>>>>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
>>>>>> +                              const uint8_t *src8, ptrdiff_t mref,
>>>>>> +                              ptrdiff_t pref, int clip_max)
>>>>>> +{
>>>>>> +    uint16_t *dstp = (uint16_t *)dst8;
>>>>>> +    const uint16_t *srcp = (const uint16_t *)src8;
>>>>>> +    const uint16_t *srcp_above = srcp + mref / 2;
>>>>>> +    const uint16_t *srcp_below = srcp + pref / 2;
>>>>>> +    int i;
>>>>>> +    for (i = 0; i < linesize; i++) {
>>>>>> +        // this calculation is an integer representation of
>>>>>> +        // '0.5 * current + 0.25 * above + 0.25 * below'
>>>>>> +        // '1 +' is for rounding.
>>>>>> +        dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) +
>>> av_le2ne16(srcp[i])
>>>
>>> You might want to load srcp[i] into a local variable here as well.
>>>
>>>>>> +                                + av_le2ne16(srcp_above[i])
>>>>>> +                                + av_le2ne16(srcp_below[i])) >> 2);
>>>>>
>>>>> This might work (And Michael will be able to confirm that if
>>>>> filter-pixfmts-tinterlace_vlpf passes)...
>>>>>
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>>  static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
>>>>>> -                                   const uint8_t *srcp,
>>>>>> -                                   ptrdiff_t mref, ptrdiff_t pref)
>>>>>> +                                   const uint8_t *srcp, ptrdiff_t
>>> mref,
>>>>>> +                                   ptrdiff_t pref, int clip_max)
>>>>>>  {
>>>>>>      const uint8_t *srcp_above = srcp + mref;
>>>>>>      const uint8_t *srcp_below = srcp + pref;
>>>>>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp,
>>>>> ptrdiff_t linesize,
>>>>>>      }
>>>>>>  }
>>>>>>
>>>>>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t
>>>>> linesize,
>>>>>> +                                      const uint8_t *src8, ptrdiff_t
>>>>> mref,
>>>>>> +                                      ptrdiff_t pref, int clip_max)
>>>>>> +{
>>>>>> +    uint16_t *dstp = (uint16_t *)dst8;
>>>>>> +    const uint16_t *srcp = (const uint16_t *)src8;
>>>>>> +    const uint16_t *srcp_above = srcp + mref / 2;
>>>>>> +    const uint16_t *srcp_below = srcp + pref / 2;
>>>>>> +    const uint16_t *srcp_above2 = srcp + mref;
>>>>>> +    const uint16_t *srcp_below2 = srcp + pref;
>>>>>> +    int i, srcp_x, srcp_ab;
>>>>>> +    for (i = 0; i < linesize; i++) {
>>>>>> +        // this calculation is an integer representation of
>>>>>> +        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 *
>>>>> above2 - 0.125 * below2'
>>>>>> +        // '4 +' is for rounding.
>>>>>> +        srcp_x = av_le2ne16(srcp[i]) << 1;
>>>>>> +        srcp_ab = av_le2ne16(srcp_above[i]) +
>>> av_le2ne16(srcp_below[i]);
>>>>>> +        dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) +
>>>>> srcp_x + srcp_ab) << 1)
>>>>>> +                                     - av_le2ne16(srcp_above2[i])
>>>>>> +                                     - av_le2ne16(srcp_below2[i])) >>
>>>>> 3, 0, clip_max));
>>>>>> +        // Prevent over-sharpening:
>>>>>> +        // dst must not exceed src when the average of above and below
>>>>>> +        // is less than src. And the other way around.
>>>>>> +        if (srcp_ab > srcp_x) {
>>>>>> +            if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i]))
>>>>>> +                dstp[i] = srcp[i];
>>>>>> +        } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i]))
>>>>>> +            dstp[i] = srcp[i];
>>>>>
>>>>> ...but chances are this over-sharpening prevention part will not. You're
>>>>> loading in native endianness here before storing. You only byteswapped
>>>>> for the comparison.
>>>>>
>>>>> Also, consider using local variables inside the for loop. You're loading
>>>>> scrp[i] and dstp[i] several times per iteration.
>>>>>
>>>>
>>>> Okay, then I would do:
>>>>     int i, dstp_le, srcp_le, srcp_x, srcp_ab;
>>>
>>> These are not pointers, so maybe remove the "p" to avoid confusion.
>>>
>>>>     for (i = 0; i < linesize; i++) {
>>>>         srcp_le = av_le2ne16(srcp[i]);
>>>>         srcp_x = srcp_le << 1;
>>>>         srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
>>>>         dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1)
>>>>                                  - av_le2ne16(srcp_above2[i])
>>>>                                  - av_le2ne16(srcp_below2[i])) >> 3, 0,
>>> clip_max);
>>>>         if (srcp_ab > srcp_x) {
>>>>             if (dstp_le < srcp_le)
>>>>                 dstp[i] = srcp[i];
>>>>             else
>>>>                 dstp[i] = av_le2ne16(dstp_le);
>>>>         } else if (dstp_le > srcp_le) {
>>>>             dstp[i] = srcp[i];
>>>>         } else
>>>>             dstp[i] = av_le2ne16(dstp_le);
>>>>     }
>>>> Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ?
>>>
>>> Yes. No need to load srcp[i] again.
>>>
>>
>> Okay, thanks!
>> A patch with your suggestions is attached.
>> I will send a separate patch that removes the "p" from srcp_x and src_ab
>> in lowpass_line_complex_c function for consistency.
> 
>>  libavfilter/interlace.h                        |    5 +
>>  libavfilter/tinterlace.h                       |    5 +
>>  libavfilter/vf_interlace.c                     |   97 +++++++++++++++++++++----
>>  libavfilter/vf_tinterlace.c                    |   78 ++++++++++++++++++--
>>  libavfilter/x86/vf_interlace.asm               |   80 ++++++++++++++++++--
>>  libavfilter/x86/vf_interlace_init.c            |   51 +++++++++----
>>  libavfilter/x86/vf_tinterlace_init.c           |   51 +++++++++----
>>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf |   11 ++
>>  tests/ref/fate/filter-pixfmts-tinterlace_merge |   11 ++
>>  tests/ref/fate/filter-pixfmts-tinterlace_pad   |   11 ++
>>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  |   11 ++
>>  11 files changed, 355 insertions(+), 56 deletions(-)
>> d34a8ac3c7d1213a33f8fb5d144e0c9fd7f694e6  0003-avfilter-interlace-add-support-for-10-and-12-bit.patch
>> From 4f7172f258bcad46c401ac0acbb4ef1666466c18 Mon Sep 17 00:00:00 2001
>> From: Thomas Mundt <tmundt75 at gmail.com>
>> Date: Tue, 19 Sep 2017 22:23:23 +0200
>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit
>>
>> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
>> ---
>>  libavfilter/interlace.h                        |  5 +-
>>  libavfilter/tinterlace.h                       |  5 +-
>>  libavfilter/vf_interlace.c                     | 97 ++++++++++++++++++++++----
>>  libavfilter/vf_tinterlace.c                    | 78 +++++++++++++++++++--
>>  libavfilter/x86/vf_interlace.asm               | 80 +++++++++++++++++++--
>>  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
>>  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
>>  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
>>  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
>>  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
>>  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
>>  11 files changed, 355 insertions(+), 56 deletions(-)
> 
> Tested on mips qemu and x86-64, works

Applied and pushed then. Thanks.