FFmpeg
vf_vmafmotion.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate VMAF Motion score.
25  */
26 
27 #include "libavutil/file_open.h"
28 #include "libavutil/mem.h"
29 #include "libavutil/opt.h"
30 #include "libavutil/pixdesc.h"
31 #include "avfilter.h"
32 #include "filters.h"
33 #include "formats.h"
34 #include "video.h"
35 #include "vmaf_motion.h"
36 
37 #define BIT_SHIFT 15
38 
39 static const float FILTER_5[5] = {
40  0.054488685,
41  0.244201342,
42  0.402619947,
43  0.244201342,
44  0.054488685
45 };
46 
47 typedef struct VMAFMotionContext {
48  const AVClass *class;
50  FILE *stats_file;
53 
54 #define OFFSET(x) offsetof(VMAFMotionContext, x)
55 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
56 
57 static const AVOption vmafmotion_options[] = {
58  {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
59  { NULL }
60 };
61 
62 AVFILTER_DEFINE_CLASS(vmafmotion);
63 
64 static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
65  int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
66 {
67  ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
68  ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
69  uint64_t sum = 0;
70  int i, j;
71 
72  for (i = 0; i < h; i++) {
73  for (j = 0; j < w; j++) {
74  sum += abs(img1[j] - img2[j]);
75  }
76  img1 += img1_stride;
77  img2 += img2_stride;
78  }
79 
80  return sum;
81 }
82 
83 static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
84  uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
85  ptrdiff_t _dst_stride)
86 {
87  ptrdiff_t src_stride = _src_stride / sizeof(*src);
88  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
89  int radius = filt_w / 2;
90  int borders_left = radius;
91  int borders_right = w - (filt_w - radius);
92  int i, j, k;
93 
94  for (i = 0; i < h; i++) {
95  for (j = 0; j < borders_left; j++) {
96  int sum = 0;
97  for (k = 0; k < filt_w; k++) {
98  int j_tap = FFABS(j - radius + k);
99  if (j_tap >= w) {
100  j_tap = w - (j_tap - w + 1);
101  }
102  sum += filter[k] * src[i * src_stride + j_tap];
103  }
104  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
105  }
106 
107  for (j = borders_left; j < borders_right; j++) {
108  int sum = 0;
109  for (k = 0; k < filt_w; k++) {
110  sum += filter[k] * src[i * src_stride + j - radius + k];
111  }
112  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
113  }
114 
115  for (j = borders_right; j < w; j++) {
116  int sum = 0;
117  for (k = 0; k < filt_w; k++) {
118  int j_tap = FFABS(j - radius + k);
119  if (j_tap >= w) {
120  j_tap = w - (j_tap - w + 1);
121  }
122  sum += filter[k] * src[i * src_stride + j_tap];
123  }
124  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
125  }
126  }
127 }
128 
129 #define conv_y_fn(type, bits) \
130 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
131  const uint8_t *_src, uint16_t *dst, \
132  int w, int h, ptrdiff_t _src_stride, \
133  ptrdiff_t _dst_stride) \
134 { \
135  const type *src = (const type *) _src; \
136  ptrdiff_t src_stride = _src_stride / sizeof(*src); \
137  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
138  int radius = filt_w / 2; \
139  int borders_top = radius; \
140  int borders_bottom = h - (filt_w - radius); \
141  int i, j, k; \
142  int sum = 0; \
143  \
144  for (i = 0; i < borders_top; i++) { \
145  for (j = 0; j < w; j++) { \
146  sum = 0; \
147  for (k = 0; k < filt_w; k++) { \
148  int i_tap = FFABS(i - radius + k); \
149  if (i_tap >= h) { \
150  i_tap = h - (i_tap - h + 1); \
151  } \
152  sum += filter[k] * src[i_tap * src_stride + j]; \
153  } \
154  dst[i * dst_stride + j] = sum >> bits; \
155  } \
156  } \
157  for (i = borders_top; i < borders_bottom; i++) { \
158  for (j = 0; j < w; j++) { \
159  sum = 0; \
160  for (k = 0; k < filt_w; k++) { \
161  sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
162  } \
163  dst[i * dst_stride + j] = sum >> bits; \
164  } \
165  } \
166  for (i = borders_bottom; i < h; i++) { \
167  for (j = 0; j < w; j++) { \
168  sum = 0; \
169  for (k = 0; k < filt_w; k++) { \
170  int i_tap = FFABS(i - radius + k); \
171  if (i_tap >= h) { \
172  i_tap = h - (i_tap - h + 1); \
173  } \
174  sum += filter[k] * src[i_tap * src_stride + j]; \
175  } \
176  dst[i * dst_stride + j] = sum >> bits; \
177  } \
178  } \
179 }
180 
181 conv_y_fn(uint8_t, 8)
182 conv_y_fn(uint16_t, 10)
183 
184 static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
185  dsp->convolution_x = convolution_x;
186  dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
187  dsp->sad = image_sad;
188 }
189 
191 {
192  double score;
193 
194  s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
195  s->width, s->height, ref->linesize[0], s->stride);
196  s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
197  s->width, s->height, s->stride, s->stride);
198 
199  if (!s->nb_frames) {
200  score = 0.0;
201  } else {
202  uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
203  s->width, s->height, s->stride, s->stride);
204  // the output score is always normalized to 8 bits
205  score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
206  }
207 
208  FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
209  s->nb_frames++;
210  s->motion_sum += score;
211 
212  return score;
213 }
214 
215 static void set_meta(AVDictionary **metadata, const char *key, float d)
216 {
217  char value[128];
218  snprintf(value, sizeof(value), "%0.2f", d);
219  av_dict_set(metadata, key, value, 0);
220 }
221 
223 {
224  VMAFMotionContext *s = ctx->priv;
225  double score;
226 
227  score = ff_vmafmotion_process(&s->data, ref);
228  set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
229  if (s->stats_file) {
230  fprintf(s->stats_file,
231  "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
232  }
233 }
234 
235 
237  int w, int h, enum AVPixelFormat fmt)
238 {
239  size_t data_sz;
240  int i;
242 
243  if (w < 3 || h < 3)
244  return AVERROR(EINVAL);
245 
246  s->width = w;
247  s->height = h;
248  s->stride = FFALIGN(w * sizeof(uint16_t), 32);
249 
250  data_sz = (size_t) s->stride * h;
251  if (!(s->blur_data[0] = av_malloc(data_sz)) ||
252  !(s->blur_data[1] = av_malloc(data_sz)) ||
253  !(s->temp_data = av_malloc(data_sz))) {
254  return AVERROR(ENOMEM);
255  }
256 
257  for (i = 0; i < 5; i++) {
258  s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
259  }
260 
261  vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
262 
263  return 0;
264 }
265 
267  AVFilterFormatsConfig **cfg_in,
268  AVFilterFormatsConfig **cfg_out)
269 {
270  AVFilterFormats *fmts_list = NULL;
271  int format, ret;
272 
273  for (format = 0; av_pix_fmt_desc_get(format); format++) {
276  (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
277  (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
278  (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
279  (ret = ff_add_format(&fmts_list, format)) < 0)
280  return ret;
281  }
282 
283  return ff_set_common_formats2(ctx, cfg_in, cfg_out, fmts_list);
284 }
285 
287 {
288  AVFilterContext *ctx = inlink->dst;
289  VMAFMotionContext *s = ctx->priv;
290 
291  return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
292  ctx->inputs[0]->h, ctx->inputs[0]->format);
293 }
294 
296 {
297  av_free(s->blur_data[0]);
298  av_free(s->blur_data[1]);
299  av_free(s->temp_data);
300 
301  return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
302 }
303 
305 {
306  AVFilterContext *ctx = inlink->dst;
308  return ff_filter_frame(ctx->outputs[0], ref);
309 }
310 
312 {
313  VMAFMotionContext *s = ctx->priv;
314 
315  if (s->stats_file_str) {
316  if (!strcmp(s->stats_file_str, "-")) {
317  s->stats_file = stdout;
318  } else {
319  s->stats_file = avpriv_fopen_utf8(s->stats_file_str, "w");
320  if (!s->stats_file) {
321  int err = AVERROR(errno);
322  av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
323  s->stats_file_str, av_err2str(err));
324  return err;
325  }
326  }
327  }
328 
329  return 0;
330 }
331 
333 {
334  VMAFMotionContext *s = ctx->priv;
335  double avg_motion = ff_vmafmotion_uninit(&s->data);
336 
337  if (s->data.nb_frames > 0) {
338  av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
339  }
340 
341  if (s->stats_file && s->stats_file != stdout)
342  fclose(s->stats_file);
343 }
344 
345 static const AVFilterPad vmafmotion_inputs[] = {
346  {
347  .name = "reference",
348  .type = AVMEDIA_TYPE_VIDEO,
349  .filter_frame = filter_frame,
350  .config_props = config_input_ref,
351  },
352 };
353 
355  .name = "vmafmotion",
356  .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
357  .init = init,
358  .uninit = uninit,
359  .priv_size = sizeof(VMAFMotionContext),
360  .priv_class = &vmafmotion_class,
365 };
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1061
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3170
ff_set_common_formats2
int ff_set_common_formats2(const AVFilterContext *ctx, AVFilterFormatsConfig **cfg_in, AVFilterFormatsConfig **cfg_out, AVFilterFormats *formats)
Definition: formats.c:1007
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
OFFSET
#define OFFSET(x)
Definition: vf_vmafmotion.c:54
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
pixdesc.h
w
uint8_t w
Definition: llviddspenc.c:38
AVOption
AVOption.
Definition: opt.h:429
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
AVDictionary
Definition: dict.c:34
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
video.h
ff_vmafmotion_process
double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
Definition: vf_vmafmotion.c:190
BIT_SHIFT
#define BIT_SHIFT
Definition: vf_vmafmotion.c:37
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:64
formats.h
VMAFMotionContext::stats_file
FILE * stats_file
Definition: vf_vmafmotion.c:50
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
Definition: vf_vmafmotion.c:304
AV_PIX_FMT_FLAG_HWACCEL
#define AV_PIX_FMT_FLAG_HWACCEL
Pixel format is an HW accelerated format.
Definition: pixdesc.h:128
convolution_x
static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t _src_stride, ptrdiff_t _dst_stride)
Definition: vf_vmafmotion.c:83
config_input_ref
static int config_input_ref(AVFilterLink *inlink)
Definition: vf_vmafmotion.c:286
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
lrint
#define lrint
Definition: tablegen.h:53
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
av_cold
#define av_cold
Definition: attributes.h:90
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
VMAFMotionData
Definition: vmaf_motion.h:42
s
#define s(width, name)
Definition: cbs_vp9.c:198
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
ff_vmafmotion_init
int ff_vmafmotion_init(VMAFMotionData *s, int w, int h, enum AVPixelFormat fmt)
Definition: vf_vmafmotion.c:236
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:49
key
const char * key
Definition: hwcontext_opencl.c:189
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
file_open.h
img1
static uint8_t img1[WIDTH *HEIGHT]
Definition: motion.c:44
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
if
if(ret)
Definition: filter_design.txt:179
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:75
NULL
#define NULL
Definition: coverity.c:32
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(vmafmotion)
ff_add_format
int ff_add_format(AVFilterFormats **avff, int64_t fmt)
Add fmt to the list of media formats contained in *avff.
Definition: formats.c:504
double
double
Definition: af_crystalizer.c:132
abs
#define abs(x)
Definition: cuda_runtime.h:35
AVFilterFormatsConfig
Lists of formats / etc.
Definition: avfilter.h:111
ff_vmafmotion_uninit
double ff_vmafmotion_uninit(VMAFMotionData *s)
Definition: vf_vmafmotion.c:295
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
AV_PIX_FMT_FLAG_RGB
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:136
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
av_err2str
#define av_err2str(errnum)
Convenience macro, the return value should be used only directly in function arguments but never stan...
Definition: error.h:122
img2
static uint8_t img2[WIDTH *HEIGHT]
Definition: motion.c:45
AV_PIX_FMT_FLAG_BITSTREAM
#define AV_PIX_FMT_FLAG_BITSTREAM
All values of a component are bit-wise packed end to end.
Definition: pixdesc.h:124
init
static av_cold int init(AVFilterContext *ctx)
Definition: vf_vmafmotion.c:311
ff_vf_vmafmotion
const AVFilter ff_vf_vmafmotion
Definition: vf_vmafmotion.c:354
query_formats
static int query_formats(const AVFilterContext *ctx, AVFilterFormatsConfig **cfg_in, AVFilterFormatsConfig **cfg_out)
Definition: vf_vmafmotion.c:266
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:220
image_sad
static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
Definition: vf_vmafmotion.c:64
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
VMAFMotionContext
Definition: vf_vmafmotion.c:47
FILTER_QUERY_FUNC2
#define FILTER_QUERY_FUNC2(func)
Definition: filters.h:239
value
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default value
Definition: writing_filters.txt:86
AV_PIX_FMT_FLAG_BE
#define AV_PIX_FMT_FLAG_BE
Pixel format is big-endian.
Definition: pixdesc.h:116
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
avpriv_fopen_utf8
FILE * avpriv_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
Definition: file_open.c:161
AVFilter
Filter definition.
Definition: avfilter.h:201
ret
ret
Definition: filter_design.txt:187
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
vmaf_motion.h
VMAFMotionContext::data
VMAFMotionData data
Definition: vf_vmafmotion.c:49
FILTER_5
static const float FILTER_5[5]
Definition: vf_vmafmotion.c:39
avfilter.h
AVFILTER_FLAG_METADATA_ONLY
#define AVFILTER_FLAG_METADATA_ONLY
The filter is a "metadata" filter - it does not modify the frame data in any way.
Definition: avfilter.h:168
VMAFMotionDSPContext
Definition: vmaf_motion.h:29
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:117
AV_PIX_FMT_FLAG_PLANAR
#define AV_PIX_FMT_FLAG_PLANAR
At least one pixel component is not in the first data plane.
Definition: pixdesc.h:132
do_vmafmotion
static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
Definition: vf_vmafmotion.c:222
vmafmotion_inputs
static const AVFilterPad vmafmotion_inputs[]
Definition: vf_vmafmotion.c:345
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
FLAGS
#define FLAGS
Definition: vf_vmafmotion.c:55
desc
const char * desc
Definition: libsvtav1.c:79
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
conv_y_fn
#define conv_y_fn(type, bits)
Definition: vf_vmafmotion.c:129
mem.h
vmafmotion_options
static const AVOption vmafmotion_options[]
Definition: vf_vmafmotion.c:57
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
av_free
#define av_free(p)
Definition: tableprint_vlc.h:33
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_dict_set
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
Definition: dict.c:88
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_vmafmotion.c:332
VMAFMotionContext::stats_file_str
char * stats_file_str
Definition: vf_vmafmotion.c:51
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
h
h
Definition: vp9dsp_template.c:2070
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Underlying C type is a uint8_t* that is either NULL or points to a C string allocated with the av_mal...
Definition: opt.h:276
AV_PIX_FMT_FLAG_PAL
#define AV_PIX_FMT_FLAG_PAL
Pixel format has a palette in data[1], values are indexes in this palette.
Definition: pixdesc.h:120
snprintf
#define snprintf
Definition: snprintf.h:34
src
#define src
Definition: vp8dsp.c:248
set_meta
static void set_meta(AVDictionary **metadata, const char *key, float d)
Definition: vf_vmafmotion.c:215