FFmpeg
vf_vmafmotion.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate VMAF Motion score.
25  */
26 
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avfilter.h"
30 #include "formats.h"
31 #include "internal.h"
32 #include "vmaf_motion.h"
33 
34 #define BIT_SHIFT 15
35 
36 static const float FILTER_5[5] = {
37  0.054488685,
38  0.244201342,
39  0.402619947,
40  0.244201342,
41  0.054488685
42 };
43 
44 typedef struct VMAFMotionContext {
45  const AVClass *class;
47  FILE *stats_file;
50 
51 #define OFFSET(x) offsetof(VMAFMotionContext, x)
52 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
53 
54 static const AVOption vmafmotion_options[] = {
55  {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
56  { NULL }
57 };
58 
59 AVFILTER_DEFINE_CLASS(vmafmotion);
60 
61 static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
62  int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
63 {
64  ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
65  ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
66  uint64_t sum = 0;
67  int i, j;
68 
69  for (i = 0; i < h; i++) {
70  for (j = 0; j < w; j++) {
71  sum += abs(img1[j] - img2[j]);
72  }
73  img1 += img1_stride;
74  img2 += img2_stride;
75  }
76 
77  return sum;
78 }
79 
80 static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
81  uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
82  ptrdiff_t _dst_stride)
83 {
84  ptrdiff_t src_stride = _src_stride / sizeof(*src);
85  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
86  int radius = filt_w / 2;
87  int borders_left = radius;
88  int borders_right = w - (filt_w - radius);
89  int i, j, k;
90  int sum = 0;
91 
92  for (i = 0; i < h; i++) {
93  for (j = 0; j < borders_left; j++) {
94  sum = 0;
95  for (k = 0; k < filt_w; k++) {
96  int j_tap = FFABS(j - radius + k);
97  if (j_tap >= w) {
98  j_tap = w - (j_tap - w + 1);
99  }
100  sum += filter[k] * src[i * src_stride + j_tap];
101  }
102  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
103  }
104 
105  for (j = borders_left; j < borders_right; j++) {
106  int sum = 0;
107  for (k = 0; k < filt_w; k++) {
108  sum += filter[k] * src[i * src_stride + j - radius + k];
109  }
110  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
111  }
112 
113  for (j = borders_right; j < w; j++) {
114  sum = 0;
115  for (k = 0; k < filt_w; k++) {
116  int j_tap = FFABS(j - radius + k);
117  if (j_tap >= w) {
118  j_tap = w - (j_tap - w + 1);
119  }
120  sum += filter[k] * src[i * src_stride + j_tap];
121  }
122  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
123  }
124  }
125 }
126 
127 #define conv_y_fn(type, bits) \
128 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
129  const uint8_t *_src, uint16_t *dst, \
130  int w, int h, ptrdiff_t _src_stride, \
131  ptrdiff_t _dst_stride) \
132 { \
133  const type *src = (const type *) _src; \
134  ptrdiff_t src_stride = _src_stride / sizeof(*src); \
135  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
136  int radius = filt_w / 2; \
137  int borders_top = radius; \
138  int borders_bottom = h - (filt_w - radius); \
139  int i, j, k; \
140  int sum = 0; \
141  \
142  for (i = 0; i < borders_top; i++) { \
143  for (j = 0; j < w; j++) { \
144  sum = 0; \
145  for (k = 0; k < filt_w; k++) { \
146  int i_tap = FFABS(i - radius + k); \
147  if (i_tap >= h) { \
148  i_tap = h - (i_tap - h + 1); \
149  } \
150  sum += filter[k] * src[i_tap * src_stride + j]; \
151  } \
152  dst[i * dst_stride + j] = sum >> bits; \
153  } \
154  } \
155  for (i = borders_top; i < borders_bottom; i++) { \
156  for (j = 0; j < w; j++) { \
157  sum = 0; \
158  for (k = 0; k < filt_w; k++) { \
159  sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
160  } \
161  dst[i * dst_stride + j] = sum >> bits; \
162  } \
163  } \
164  for (i = borders_bottom; i < h; i++) { \
165  for (j = 0; j < w; j++) { \
166  sum = 0; \
167  for (k = 0; k < filt_w; k++) { \
168  int i_tap = FFABS(i - radius + k); \
169  if (i_tap >= h) { \
170  i_tap = h - (i_tap - h + 1); \
171  } \
172  sum += filter[k] * src[i_tap * src_stride + j]; \
173  } \
174  dst[i * dst_stride + j] = sum >> bits; \
175  } \
176  } \
177 }
178 
179 conv_y_fn(uint8_t, 8);
180 conv_y_fn(uint16_t, 10);
181 
182 static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
184  dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
185  dsp->sad = image_sad;
186 }
187 
189 {
190  double score;
191 
192  s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
193  s->width, s->height, ref->linesize[0], s->stride);
194  s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
195  s->width, s->height, s->stride, s->stride);
196 
197  if (!s->nb_frames) {
198  score = 0.0;
199  } else {
200  uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
201  s->width, s->height, s->stride, s->stride);
202  // the output score is always normalized to 8 bits
203  score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
204  }
205 
206  FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
207  s->nb_frames++;
208  s->motion_sum += score;
209 
210  return score;
211 }
212 
213 static void set_meta(AVDictionary **metadata, const char *key, float d)
214 {
215  char value[128];
216  snprintf(value, sizeof(value), "%0.2f", d);
217  av_dict_set(metadata, key, value, 0);
218 }
219 
221 {
222  VMAFMotionContext *s = ctx->priv;
223  double score;
224 
225  score = ff_vmafmotion_process(&s->data, ref);
226  set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
227  if (s->stats_file) {
228  fprintf(s->stats_file,
229  "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
230  }
231 }
232 
233 
235  int w, int h, enum AVPixelFormat fmt)
236 {
237  size_t data_sz;
238  int i;
240 
241  s->width = w;
242  s->height = h;
243  s->stride = FFALIGN(w * sizeof(uint16_t), 32);
244 
245  data_sz = (size_t) s->stride * h;
246  if (!(s->blur_data[0] = av_malloc(data_sz)) ||
247  !(s->blur_data[1] = av_malloc(data_sz)) ||
248  !(s->temp_data = av_malloc(data_sz))) {
249  return AVERROR(ENOMEM);
250  }
251 
252  for (i = 0; i < 5; i++) {
253  s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
254  }
255 
256  vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
257 
258  return 0;
259 }
260 
262 {
263  AVFilterFormats *fmts_list = NULL;
264  int format, ret;
265 
266  for (format = 0; av_pix_fmt_desc_get(format); format++) {
269  (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
270  (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
271  (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
272  (ret = ff_add_format(&fmts_list, format)) < 0)
273  return ret;
274  }
275 
276  return ff_set_common_formats(ctx, fmts_list);
277 }
278 
280 {
281  AVFilterContext *ctx = inlink->dst;
282  VMAFMotionContext *s = ctx->priv;
283 
284  return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
285  ctx->inputs[0]->h, ctx->inputs[0]->format);
286 }
287 
289 {
290  av_free(s->blur_data[0]);
291  av_free(s->blur_data[1]);
292  av_free(s->temp_data);
293 
294  return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
295 }
296 
298 {
299  AVFilterContext *ctx = inlink->dst;
300  do_vmafmotion(ctx, ref);
301  return ff_filter_frame(ctx->outputs[0], ref);
302 }
303 
305 {
306  VMAFMotionContext *s = ctx->priv;
307 
308  if (s->stats_file_str) {
309  if (!strcmp(s->stats_file_str, "-")) {
310  s->stats_file = stdout;
311  } else {
312  s->stats_file = fopen(s->stats_file_str, "w");
313  if (!s->stats_file) {
314  int err = AVERROR(errno);
315  char buf[128];
316  av_strerror(err, buf, sizeof(buf));
317  av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
318  s->stats_file_str, buf);
319  return err;
320  }
321  }
322  }
323 
324  return 0;
325 }
326 
328 {
329  VMAFMotionContext *s = ctx->priv;
330  double avg_motion = ff_vmafmotion_uninit(&s->data);
331 
332  if (s->data.nb_frames > 0) {
333  av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
334  }
335 
336  if (s->stats_file && s->stats_file != stdout)
337  fclose(s->stats_file);
338 }
339 
340 static const AVFilterPad vmafmotion_inputs[] = {
341  {
342  .name = "reference",
343  .type = AVMEDIA_TYPE_VIDEO,
344  .filter_frame = filter_frame,
345  .config_props = config_input_ref,
346  },
347  { NULL }
348 };
349 
350 static const AVFilterPad vmafmotion_outputs[] = {
351  {
352  .name = "default",
353  .type = AVMEDIA_TYPE_VIDEO,
354  },
355  { NULL }
356 };
357 
359  .name = "vmafmotion",
360  .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
361  .init = init,
362  .uninit = uninit,
363  .query_formats = query_formats,
364  .priv_size = sizeof(VMAFMotionContext),
365  .priv_class = &vmafmotion_class,
366  .inputs = vmafmotion_inputs,
367  .outputs = vmafmotion_outputs,
368 };
#define AV_PIX_FMT_FLAG_PAL
Pixel format has a palette in data[1], values are indexes in this palette.
Definition: pixdesc.h:132
#define NULL
Definition: coverity.c:32
static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2522
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
AVOption.
Definition: opt.h:246
static const AVFilterPad vmafmotion_inputs[]
const char * fmt
Definition: avisynth_c.h:861
Main libavfilter public API header.
uint16_t * blur_data[2]
Definition: vmaf_motion.h:47
AVFILTER_DEFINE_CLASS(vmafmotion)
const char * desc
Definition: nvenc.c:68
uint64_t(* sad)(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t img1_stride, ptrdiff_t img2_stride)
Definition: vmaf_motion.h:30
static av_cold void uninit(AVFilterContext *ctx)
static uint8_t img2[WIDTH *HEIGHT]
Definition: motion.c:43
const char * key
static const AVFilterPad vmafmotion_outputs[]
#define src
Definition: vp8dsp.c:254
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static uint8_t img1[WIDTH *HEIGHT]
Definition: motion.c:42
const char * name
Pad name.
Definition: internal.h:60
#define OFFSET(x)
Definition: vf_vmafmotion.c:51
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
uint8_t
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
AVOptions.
uint64_t nb_frames
Definition: vmaf_motion.h:50
GLsizei GLboolean const GLfloat * value
Definition: opengl_enc.c:108
VMAFMotionData data
Definition: vf_vmafmotion.c:46
AVDictionary * metadata
metadata.
Definition: frame.h:581
static int query_formats(AVFilterContext *ctx)
#define FFALIGN(x, a)
Definition: macros.h:48
#define av_log(a,...)
void(* convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t src_stride, ptrdiff_t dst_stride)
Definition: vmaf_motion.h:32
#define FLAGS
Definition: vf_vmafmotion.c:52
A filter pad used for either input or output.
Definition: internal.h:54
static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
Definition: vf_vmafmotion.c:61
#define AV_PIX_FMT_FLAG_HWACCEL
Pixel format is an HW accelerated format.
Definition: pixdesc.h:140
int ff_add_format(AVFilterFormats **avff, int64_t fmt)
Add fmt to the list of media formats contained in *avff.
Definition: formats.c:337
static av_cold int init(AVFilterContext *ctx)
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:106
double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
uint8_t w
Definition: llviddspenc.c:38
AVFormatContext * ctx
Definition: movenc.c:48
static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define s(width, name)
Definition: cbs_vp9.c:257
double ff_vmafmotion_uninit(VMAFMotionData *s)
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
if(ret)
#define BIT_SHIFT
Definition: vf_vmafmotion.c:34
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
#define abs(x)
Definition: cuda_runtime.h:35
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
void * buf
Definition: avisynth_c.h:766
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
Definition: dict.c:70
static int config_input_ref(AVFilterLink *inlink)
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
ptrdiff_t stride
Definition: vmaf_motion.h:46
#define snprintf
Definition: snprintf.h:34
AVFilter ff_vf_vmafmotion
#define AV_PIX_FMT_FLAG_BITSTREAM
All values of a component are bit-wise packed end to end.
Definition: pixdesc.h:136
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
uint16_t filter[5]
Definition: vmaf_motion.h:43
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
int ff_vmafmotion_init(VMAFMotionData *s, int w, int h, enum AVPixelFormat fmt)
static void set_meta(AVDictionary **metadata, const char *key, float d)
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
Definition: error.c:105
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
static const AVOption vmafmotion_options[]
Definition: vf_vmafmotion.c:54
uint16_t * temp_data
Definition: vmaf_motion.h:48
#define AV_PIX_FMT_FLAG_BE
Pixel format is big-endian.
Definition: pixdesc.h:128
#define av_free(p)
void(* convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src, uint16_t *dst, int w, int h, ptrdiff_t src_stride, ptrdiff_t dst_stride)
Definition: vmaf_motion.h:35
static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t _src_stride, ptrdiff_t _dst_stride)
Definition: vf_vmafmotion.c:80
A list of supported formats for one end of a filter link.
Definition: formats.h:64
#define lrint
Definition: tablegen.h:53
An instance of a filter.
Definition: avfilter.h:338
static const float FILTER_5[5]
Definition: vf_vmafmotion.c:36
VMAFMotionDSPContext vmafdsp
Definition: vmaf_motion.h:51
#define FFSWAP(type, a, b)
Definition: common.h:99
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int depth
Number of bits in the component.
Definition: pixdesc.h:58
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
#define AV_PIX_FMT_FLAG_PLANAR
At least one pixel component is not in the first data plane.
Definition: pixdesc.h:144
#define conv_y_fn(type, bits)
double motion_sum
Definition: vmaf_motion.h:49