[FFmpeg-devel] [PATCH] avfilter: add vmafmotion filter

Fri Sep 8 18:38:25 EEST 2017

Hi,

On Wed, Sep 6, 2017 at 4:04 PM, Ashish Pratap Singh <ashk43712 at gmail.com>
wrote:

> +#define MAX_ALIGN 32
> +#define ALIGN_CEIL(x) ((x) + ((x) % MAX_ALIGN ? MAX_ALIGN - (x) %
> MAX_ALIGN : 0))
>

../libavutil/macros.h:#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))

>
> +static const AVOption vmafmotion_options[] = {
> +    { NULL }
> +};
>

#define vmafmotion_options NULL?

> +FRAMESYNC_DEFINE_CLASS(vmafmotion, VMAFMotionContext, fs);
>

src/libavfilter/vf_vmafmotion.c:59:1: warning: unused function
'vmafmotion_framesync_preinit' [-Wunused-function]
FRAMESYNC_DEFINE_CLASS(vmafmotion, VMAFMotionContext, fs);
^
src/libavfilter/framesync2.h:298:12: note: expanded from macro
'FRAMESYNC_DEFINE_CLASS'
static int name##_framesync_preinit(AVFilterContext *ctx) { \
           ^
<scratch space>:44:1: note: expanded from here
vmafmotion_framesync_preinit

> +static inline int floorn(int n, int m)
> +{
> +    return n - n % m;
> +}
> +
> +static inline int ceiln(int n, int m)
> +{
> +    return n % m ? n + (m - n % m) : n;
> +}
> +
> +static void convolution_x(const int *filter, int filt_w, const uint16_t
> *src,
> +                          uint16_t *dst, int w, int h, ptrdiff_t
> src_stride,
> +                          ptrdiff_t dst_stride)
>

I would move sad() convolution_x/y() as function pointers into a
VMAFMotionDSPContext.

In ffmpeg, we always express "stride" in bytes, not in pixels. In SIMD, it
will become clear why. Same comment for _y.

> +{
> +    int radius = filt_w / 2;
> +    int borders_left = ceiln(radius, 1);
> +    int borders_right = floorn(w - (filt_w - radius), 1);
>

For cases where m is 1, ceiln() and floorn() just return n, so this can be
simplified. Same comment for _y.

> +#define conv_y_fn(type, bits) \
> +    static void convolution_y_##bits##bit(const int *filter, int filt_w,
> \
> +                                          const type *src, uint16_t *dst,
> \
> +                                          int w, int h, ptrdiff_t
> src_stride, \
> +                                          ptrdiff_t dst_stride) \
> +{ \
> +    int radius = filt_w / 2; \
> +    int borders_top = ceiln(radius, 1); \
> +    int borders_bottom = floorn(h - (filt_w - radius), 1); \
> +    int i, j, k; \
> +    int sum = 0; \
> +    \
> +    for (i = 0; i < borders_top; i++) { \
> +        for (j = 0; j < w; j++) { \
> +            sum = 0; \
> +            for (k = 0; k < filt_w; k++) { \
> +                int i_tap = FFABS(i - radius + k); \
> +                if (i_tap >= h) { \
> +                    i_tap = h - (i_tap - h + 1); \
> +                } \
> +                sum += filter[k] * src[i_tap * src_stride + j]; \
> +            } \
> +            dst[i * dst_stride + j] = sum >> N; \
>

Same comment for bottom 2: I would decrease the shift in this expression by
a few bits. The reason is that you can use the extra bits as fractional
bits to increase precision. E.g. for 8-bits, the input is 8bits and the
filter is 15, but the uint16_t can hold 16 bits (let's say 15 so we can use
signed instructions also) data, so instead of 8*15>>15=8 bits, we can shift
by just 8bits to get 15 bits (full-width) uint16_t. Same for 10-bit, where
we can shift by 10 instead of 15.

The sad calculation then indeed has to downshift the result again (*score =
(double) (sad * 1.0 / (w * h)); becomes sad * 1.0 / (w * h << (15-bits)).

> +void convolution_f32(const int *filter, int filt_w, const void *src,
> +                     uint16_t *dst, uint16_t *tmp, int w, int h,
> +                     ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t
> type)
> +{
> +    if(type == 8) {
>

Rename "type" to bitdepth.

> +    if (s->desc->comp[0].depth <= 8) {
> +        ref_px_stride = ref_stride / sizeof(uint8_t);
> +        convolution_f32(s->filter, 5, (const uint8_t *) ref->data[0],
> +                        s->blur_data, s->temp_data, s->width, s->height,
> +                        ref_px_stride, px_stride, 8);
> +    } else {
> +        ref_px_stride = ref_stride / sizeof(uint16_t);
> +        convolution_f32(s->filter, 5, (const uint16_t *) ref->data[0],
> +                        s->blur_data, s->temp_data, s->width, s->height,
> +                        ref_px_stride, px_stride, 10);
> +    }
>

The if and resulting cast are unnecessary f you express stride in bytes
instead of pixels.

+    memcpy(s->prev_blur_data, s->blur_data, data_sz);
>

Hm... Try FFSWAP with pointers.

> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum AVPixelFormat pix_fmts[] = {
> +        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
> +        AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE,
> AV_PIX_FMT_YUV420P10LE,
>

Remove the LE suffix, this filter handles native endian.

> +static int activate(AVFilterContext *ctx)
> +{
> +    VMAFMotionContext *s = ctx->priv;
> +    return ff_framesync2_activate(&s->fs);
> +}
> +
> +
> +static av_cold void uninit(AVFilterContext *ctx)
>

One newline too many in between the two functions.

> +#ifndef MOTION_TOOLS_H_
> +#define MOTION_TOOLS_H_
>

AVFILTER_VMAFMOTION_H

> +#define N 15
> +
> +static const float FILTER_5[5] = {
> +    0.054488685,
> +    0.244201342,
> +    0.402619947,
> +    0.244201342,
> +    0.054488685
> +};
>

>
+void convolution_f32(const int *filter, int filt_width, const void *src,
> +                     uint16_t *dst, uint16_t *tmp, int w, int h,
> +                     ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t
> type);
>

These 3 should be in the C file. I would say N is a very generic name but
if it compiles I guess it's fine :-).

Ronald