FFmpeg
vf_libvmaf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate the VMAF between two input videos.
25  */
26 
27 #include <pthread.h>
28 #include <libvmaf.h>
29 #include "libavutil/avstring.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "avfilter.h"
33 #include "drawutils.h"
34 #include "formats.h"
35 #include "framesync.h"
36 #include "internal.h"
37 #include "video.h"
38 
39 typedef struct LIBVMAFContext {
40  const AVClass *class;
43  int width;
44  int height;
45  double vmaf_score;
50  int eof;
53  int frame_set;
54  char *model_path;
55  char *log_path;
56  char *log_fmt;
61  int psnr;
62  int ssim;
63  int ms_ssim;
64  char *pool;
65  int n_threads;
68  int error;
70 
71 #define OFFSET(x) offsetof(LIBVMAFContext, x)
72 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
73 
74 static const AVOption libvmaf_options[] = {
75  {"model_path", "Set the model to be used for computing vmaf.", OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
76  {"log_path", "Set the file path to be used to store logs.", OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
77  {"log_fmt", "Set the format of the log (xml or json).", OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
78  {"enable_transform", "Enables transform for computing vmaf.", OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
79  {"phone_model", "Invokes the phone model that will generate higher VMAF scores.", OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
80  {"psnr", "Enables computing psnr along with vmaf.", OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
81  {"ssim", "Enables computing ssim along with vmaf.", OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
82  {"ms_ssim", "Enables computing ms-ssim along with vmaf.", OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
83  {"pool", "Set the pool method to be used for computing vmaf.", OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
84  {"n_threads", "Set number of threads to be used when computing vmaf.", OFFSET(n_threads), AV_OPT_TYPE_INT, {.i64=0}, 0, UINT_MAX, FLAGS},
85  {"n_subsample", "Set interval for frame subsampling used when computing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS},
86  {"enable_conf_interval", "Enables confidence interval.", OFFSET(enable_conf_interval), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
87  { NULL }
88 };
89 
91 
92 #define read_frame_fn(type, bits) \
93  static int read_frame_##bits##bit(float *ref_data, float *main_data, \
94  float *temp_data, int stride, void *ctx) \
95 { \
96  LIBVMAFContext *s = (LIBVMAFContext *) ctx; \
97  int ret; \
98  \
99  pthread_mutex_lock(&s->lock); \
100  \
101  while (!s->frame_set && !s->eof) { \
102  pthread_cond_wait(&s->cond, &s->lock); \
103  } \
104  \
105  if (s->frame_set) { \
106  int ref_stride = s->gref->linesize[0]; \
107  int main_stride = s->gmain->linesize[0]; \
108  \
109  const type *ref_ptr = (const type *) s->gref->data[0]; \
110  const type *main_ptr = (const type *) s->gmain->data[0]; \
111  \
112  float *ptr = ref_data; \
113  float factor = 1.f / (1 << (bits - 8)); \
114  \
115  int h = s->height; \
116  int w = s->width; \
117  \
118  int i,j; \
119  \
120  for (i = 0; i < h; i++) { \
121  for ( j = 0; j < w; j++) { \
122  ptr[j] = ref_ptr[j] * factor; \
123  } \
124  ref_ptr += ref_stride / sizeof(*ref_ptr); \
125  ptr += stride / sizeof(*ptr); \
126  } \
127  \
128  ptr = main_data; \
129  \
130  for (i = 0; i < h; i++) { \
131  for (j = 0; j < w; j++) { \
132  ptr[j] = main_ptr[j] * factor; \
133  } \
134  main_ptr += main_stride / sizeof(*main_ptr); \
135  ptr += stride / sizeof(*ptr); \
136  } \
137  } \
138  \
139  ret = !s->frame_set; \
140  \
141  av_frame_unref(s->gref); \
142  av_frame_unref(s->gmain); \
143  s->frame_set = 0; \
144  \
145  pthread_cond_signal(&s->cond); \
146  pthread_mutex_unlock(&s->lock); \
147  \
148  if (ret) { \
149  return 2; \
150  } \
151  \
152  return 0; \
153 }
154 
156 read_frame_fn(uint16_t, 10);
157 
159 {
160  int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
161  int stride, void *ctx);
162  char *format;
163 
164  if (s->desc->comp[0].depth <= 8) {
165  read_frame = read_frame_8bit;
166  } else {
167  read_frame = read_frame_10bit;
168  }
169 
170  format = (char *) s->desc->name;
171 
172  s->error = compute_vmaf(&s->vmaf_score, format, s->width, s->height,
173  read_frame, s, s->model_path, s->log_path,
174  s->log_fmt, 0, 0, s->enable_transform,
175  s->phone_model, s->psnr, s->ssim,
176  s->ms_ssim, s->pool,
178 }
179 
180 static void *call_vmaf(void *ctx)
181 {
182  LIBVMAFContext *s = (LIBVMAFContext *) ctx;
184  if (!s->error) {
185  av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
186  } else {
190  }
191  pthread_exit(NULL);
192  return NULL;
193 }
194 
195 static int do_vmaf(FFFrameSync *fs)
196 {
197  AVFilterContext *ctx = fs->parent;
198  LIBVMAFContext *s = ctx->priv;
199  AVFrame *master, *ref;
200  int ret;
201 
202  ret = ff_framesync_dualinput_get(fs, &master, &ref);
203  if (ret < 0)
204  return ret;
205  if (!ref)
206  return ff_filter_frame(ctx->outputs[0], master);
207 
209 
210  while (s->frame_set && !s->error) {
211  pthread_cond_wait(&s->cond, &s->lock);
212  }
213 
214  if (s->error) {
215  av_log(ctx, AV_LOG_ERROR,
216  "libvmaf encountered an error, check log for details\n");
218  return AVERROR(EINVAL);
219  }
220 
221  av_frame_ref(s->gref, ref);
222  av_frame_ref(s->gmain, master);
223 
224  s->frame_set = 1;
225 
228 
229  return ff_filter_frame(ctx->outputs[0], master);
230 }
231 
233 {
234  LIBVMAFContext *s = ctx->priv;
235 
236  s->gref = av_frame_alloc();
237  s->gmain = av_frame_alloc();
238  s->error = 0;
239 
240  s->vmaf_thread_created = 0;
242  pthread_cond_init (&s->cond, NULL);
243 
244  s->fs.on_event = do_vmaf;
245  return 0;
246 }
247 
249 {
250  static const enum AVPixelFormat pix_fmts[] = {
254  };
255 
256  AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
257  if (!fmts_list)
258  return AVERROR(ENOMEM);
259  return ff_set_common_formats(ctx, fmts_list);
260 }
261 
262 
264 {
265  AVFilterContext *ctx = inlink->dst;
266  LIBVMAFContext *s = ctx->priv;
267  int th;
268 
269  if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
270  ctx->inputs[0]->h != ctx->inputs[1]->h) {
271  av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
272  return AVERROR(EINVAL);
273  }
274  if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
275  av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
276  return AVERROR(EINVAL);
277  }
278 
279  s->desc = av_pix_fmt_desc_get(inlink->format);
280  s->width = ctx->inputs[0]->w;
281  s->height = ctx->inputs[0]->h;
282 
283  th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
284  if (th) {
285  av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
286  return AVERROR(EINVAL);
287  }
288  s->vmaf_thread_created = 1;
289 
290  return 0;
291 }
292 
293 static int config_output(AVFilterLink *outlink)
294 {
295  AVFilterContext *ctx = outlink->src;
296  LIBVMAFContext *s = ctx->priv;
297  AVFilterLink *mainlink = ctx->inputs[0];
298  int ret;
299 
300  ret = ff_framesync_init_dualinput(&s->fs, ctx);
301  if (ret < 0)
302  return ret;
303  outlink->w = mainlink->w;
304  outlink->h = mainlink->h;
305  outlink->time_base = mainlink->time_base;
306  outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
307  outlink->frame_rate = mainlink->frame_rate;
308  if ((ret = ff_framesync_configure(&s->fs)) < 0)
309  return ret;
310 
311  return 0;
312 }
313 
315 {
316  LIBVMAFContext *s = ctx->priv;
317  return ff_framesync_activate(&s->fs);
318 }
319 
321 {
322  LIBVMAFContext *s = ctx->priv;
323 
324  ff_framesync_uninit(&s->fs);
325 
327  s->eof = 1;
330 
331  if (s->vmaf_thread_created)
332  {
334  s->vmaf_thread_created = 0;
335  }
336 
337  av_frame_free(&s->gref);
338  av_frame_free(&s->gmain);
339 
342 }
343 
344 static const AVFilterPad libvmaf_inputs[] = {
345  {
346  .name = "main",
347  .type = AVMEDIA_TYPE_VIDEO,
348  },{
349  .name = "reference",
350  .type = AVMEDIA_TYPE_VIDEO,
351  .config_props = config_input_ref,
352  },
353  { NULL }
354 };
355 
356 static const AVFilterPad libvmaf_outputs[] = {
357  {
358  .name = "default",
359  .type = AVMEDIA_TYPE_VIDEO,
360  .config_props = config_output,
361  },
362  { NULL }
363 };
364 
366  .name = "libvmaf",
367  .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
368  .preinit = libvmaf_framesync_preinit,
369  .init = init,
370  .uninit = uninit,
371  .query_formats = query_formats,
372  .activate = activate,
373  .priv_size = sizeof(LIBVMAFContext),
374  .priv_class = &libvmaf_class,
375  .inputs = libvmaf_inputs,
376  .outputs = libvmaf_outputs,
377 };
#define NULL
Definition: coverity.c:32
static av_always_inline int pthread_mutex_destroy(pthread_mutex_t *mutex)
Definition: os2threads.h:108
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2522
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
#define pthread_mutex_lock(a)
Definition: ffprobe.c:61
static av_always_inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
Definition: os2threads.h:166
static int do_vmaf(FFFrameSync *fs)
Definition: vf_libvmaf.c:195
AVOption.
Definition: opt.h:246
static int query_formats(AVFilterContext *ctx)
Definition: vf_libvmaf.c:248
planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
Definition: pixfmt.h:159
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
Main libavfilter public API header.
const AVPixFmtDescriptor * desc
Definition: vf_libvmaf.c:42
int vmaf_thread_created
Definition: vf_libvmaf.c:46
static const AVFilterPad libvmaf_outputs[]
Definition: vf_libvmaf.c:356
AVFilter ff_vf_libvmaf
Definition: vf_libvmaf.c:365
int ff_framesync_configure(FFFrameSync *fs)
Configure a frame sync structure.
Definition: framesync.c:117
const char * master
Definition: vf_curves.c:117
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static av_always_inline int pthread_cond_destroy(pthread_cond_t *cond)
Definition: os2threads.h:140
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
const char * name
Pad name.
Definition: internal.h:60
AVFilterContext * parent
Parent filter context.
Definition: framesync.h:152
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1093
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
uint8_t
#define av_cold
Definition: attributes.h:82
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:189
AVOptions.
int ff_framesync_init_dualinput(FFFrameSync *fs, AVFilterContext *parent)
Initialize a frame sync structure for dualinput.
Definition: framesync.c:361
int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1)
Definition: framesync.c:379
int av_frame_ref(AVFrame *dst, const AVFrame *src)
Set up a new reference to the data described by the source frame.
Definition: frame.c:443
static av_always_inline int pthread_cond_signal(pthread_cond_t *cond)
Definition: os2threads.h:148
#define av_log(a,...)
const char * name
Definition: pixdesc.h:82
A filter pad used for either input or output.
Definition: internal.h:54
planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
Definition: pixfmt.h:165
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:569
void ff_framesync_uninit(FFFrameSync *fs)
Free all memory currently allocated.
Definition: framesync.c:293
Frame sync structure.
Definition: framesync.h:146
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
Definition: pixfmt.h:161
int ff_framesync_activate(FFFrameSync *fs)
Examine the frames in the filter&#39;s input and try to produce output.
Definition: framesync.c:344
int(* on_event)(struct FFFrameSync *fs)
Callback called when a frame event is ready.
Definition: framesync.h:172
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
static const AVOption libvmaf_options[]
Definition: vf_libvmaf.c:74
pthread_cond_t cond
Definition: vf_libvmaf.c:49
#define th
Definition: regdef.h:75
#define OFFSET(x)
Definition: vf_libvmaf.c:71
static av_cold int init(AVFilterContext *ctx)
Definition: vf_libvmaf.c:232
static int read_frame(AVFilterContext *ctx, FPSContext *s, AVFilterLink *inlink, AVFilterLink *outlink)
Definition: vf_fps.c:180
#define read_frame_fn(type, bits)
Definition: vf_libvmaf.c:92
AVFormatContext * ctx
Definition: movenc.c:48
int enable_conf_interval
Definition: vf_libvmaf.c:67
pthread_mutex_t lock
Definition: vf_libvmaf.c:48
static av_always_inline int pthread_join(pthread_t thread, void **value_ptr)
Definition: os2threads.h:90
static av_always_inline int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
Definition: os2threads.h:100
#define s(width, name)
Definition: cbs_vp9.c:257
static int activate(AVFilterContext *ctx)
Definition: vf_libvmaf.c:314
#define pthread_mutex_unlock(a)
Definition: ffprobe.c:65
char * log_path
Definition: vf_libvmaf.c:55
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
AVFrame * gref
Definition: vf_libvmaf.c:52
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_libvmaf.c:320
static av_always_inline int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
Definition: os2threads.h:76
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
misc drawing utilities
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
#define FLAGS
Definition: vf_libvmaf.c:72
static const AVFilterPad libvmaf_inputs[]
Definition: vf_libvmaf.c:344
const char * name
Filter name.
Definition: avfilter.h:148
AVFrame * gmain
Definition: vf_libvmaf.c:51
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
FFFrameSync fs
Definition: vf_libvmaf.c:41
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
int
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
static void compute_vmaf_score(LIBVMAFContext *s)
Definition: vf_libvmaf.c:158
_fmutex pthread_mutex_t
Definition: os2threads.h:49
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
static void * call_vmaf(void *ctx)
Definition: vf_libvmaf.c:180
static av_always_inline int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
Definition: os2threads.h:129
pthread_t vmaf_thread
Definition: vf_libvmaf.c:47
FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs)
A list of supported formats for one end of a filter link.
Definition: formats.h:64
int enable_transform
Definition: vf_libvmaf.c:59
An instance of a filter.
Definition: avfilter.h:338
static int config_output(AVFilterLink *outlink)
Definition: vf_libvmaf.c:293
char * model_path
Definition: vf_libvmaf.c:54
double vmaf_score
Definition: vf_libvmaf.c:45
#define stride
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int depth
Number of bits in the component.
Definition: pixdesc.h:58
static int config_input_ref(AVFilterLink *inlink)
Definition: vf_libvmaf.c:263
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
char * log_fmt
Definition: vf_libvmaf.c:56