FFmpeg
vf_libvmaf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate the VMAF between two input videos.
25  */
26 
27 #include <pthread.h>
28 #include <libvmaf.h>
29 #include "libavutil/avstring.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "avfilter.h"
33 #include "drawutils.h"
34 #include "formats.h"
35 #include "framesync.h"
36 #include "internal.h"
37 #include "video.h"
38 
39 typedef struct LIBVMAFContext {
40  const AVClass *class;
43  int width;
44  int height;
45  double vmaf_score;
50  int eof;
53  int frame_set;
54  char *model_path;
55  char *log_path;
56  char *log_fmt;
61  int psnr;
62  int ssim;
63  int ms_ssim;
64  char *pool;
65  int n_threads;
68  int error;
70 
71 #define OFFSET(x) offsetof(LIBVMAFContext, x)
72 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
73 
74 static const AVOption libvmaf_options[] = {
75  {"model_path", "Set the model to be used for computing vmaf.", OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
76  {"log_path", "Set the file path to be used to store logs.", OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
77  {"log_fmt", "Set the format of the log (xml or json).", OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
78  {"enable_transform", "Enables transform for computing vmaf.", OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
79  {"phone_model", "Invokes the phone model that will generate higher VMAF scores.", OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
80  {"psnr", "Enables computing psnr along with vmaf.", OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
81  {"ssim", "Enables computing ssim along with vmaf.", OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
82  {"ms_ssim", "Enables computing ms-ssim along with vmaf.", OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
83  {"pool", "Set the pool method to be used for computing vmaf.", OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
84  {"n_threads", "Set number of threads to be used when computing vmaf.", OFFSET(n_threads), AV_OPT_TYPE_INT, {.i64=0}, 0, UINT_MAX, FLAGS},
85  {"n_subsample", "Set interval for frame subsampling used when computing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS},
86  {"enable_conf_interval", "Enables confidence interval.", OFFSET(enable_conf_interval), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
87  { NULL }
88 };
89 
91 
92 #define read_frame_fn(type, bits) \
93  static int read_frame_##bits##bit(float *ref_data, float *main_data, \
94  float *temp_data, int stride, void *ctx) \
95 { \
96  LIBVMAFContext *s = (LIBVMAFContext *) ctx; \
97  int ret; \
98  \
99  pthread_mutex_lock(&s->lock); \
100  \
101  while (!s->frame_set && !s->eof) { \
102  pthread_cond_wait(&s->cond, &s->lock); \
103  } \
104  \
105  if (s->frame_set) { \
106  int ref_stride = s->gref->linesize[0]; \
107  int main_stride = s->gmain->linesize[0]; \
108  \
109  const type *ref_ptr = (const type *) s->gref->data[0]; \
110  const type *main_ptr = (const type *) s->gmain->data[0]; \
111  \
112  float *ptr = ref_data; \
113  \
114  int h = s->height; \
115  int w = s->width; \
116  \
117  int i,j; \
118  \
119  for (i = 0; i < h; i++) { \
120  for ( j = 0; j < w; j++) { \
121  ptr[j] = (float)ref_ptr[j]; \
122  } \
123  ref_ptr += ref_stride / sizeof(*ref_ptr); \
124  ptr += stride / sizeof(*ptr); \
125  } \
126  \
127  ptr = main_data; \
128  \
129  for (i = 0; i < h; i++) { \
130  for (j = 0; j < w; j++) { \
131  ptr[j] = (float)main_ptr[j]; \
132  } \
133  main_ptr += main_stride / sizeof(*main_ptr); \
134  ptr += stride / sizeof(*ptr); \
135  } \
136  } \
137  \
138  ret = !s->frame_set; \
139  \
140  av_frame_unref(s->gref); \
141  av_frame_unref(s->gmain); \
142  s->frame_set = 0; \
143  \
144  pthread_cond_signal(&s->cond); \
145  pthread_mutex_unlock(&s->lock); \
146  \
147  if (ret) { \
148  return 2; \
149  } \
150  \
151  return 0; \
152 }
153 
155 read_frame_fn(uint16_t, 10);
156 
158 {
159  int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
160  int stride, void *ctx);
161  char *format;
162 
163  if (s->desc->comp[0].depth <= 8) {
164  read_frame = read_frame_8bit;
165  } else {
166  read_frame = read_frame_10bit;
167  }
168 
169  format = (char *) s->desc->name;
170 
171  s->error = compute_vmaf(&s->vmaf_score, format, s->width, s->height,
172  read_frame, s, s->model_path, s->log_path,
173  s->log_fmt, 0, 0, s->enable_transform,
174  s->phone_model, s->psnr, s->ssim,
175  s->ms_ssim, s->pool,
177 }
178 
179 static void *call_vmaf(void *ctx)
180 {
181  LIBVMAFContext *s = (LIBVMAFContext *) ctx;
183  if (!s->error) {
184  av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
185  } else {
189  }
190  pthread_exit(NULL);
191  return NULL;
192 }
193 
194 static int do_vmaf(FFFrameSync *fs)
195 {
196  AVFilterContext *ctx = fs->parent;
197  LIBVMAFContext *s = ctx->priv;
198  AVFrame *master, *ref;
199  int ret;
200 
201  ret = ff_framesync_dualinput_get(fs, &master, &ref);
202  if (ret < 0)
203  return ret;
204  if (!ref)
205  return ff_filter_frame(ctx->outputs[0], master);
206 
208 
209  while (s->frame_set && !s->error) {
210  pthread_cond_wait(&s->cond, &s->lock);
211  }
212 
213  if (s->error) {
214  av_log(ctx, AV_LOG_ERROR,
215  "libvmaf encountered an error, check log for details\n");
217  return AVERROR(EINVAL);
218  }
219 
220  av_frame_ref(s->gref, ref);
221  av_frame_ref(s->gmain, master);
222 
223  s->frame_set = 1;
224 
227 
228  return ff_filter_frame(ctx->outputs[0], master);
229 }
230 
232 {
233  LIBVMAFContext *s = ctx->priv;
234 
235  s->gref = av_frame_alloc();
236  s->gmain = av_frame_alloc();
237  s->error = 0;
238 
239  s->vmaf_thread_created = 0;
241  pthread_cond_init (&s->cond, NULL);
242 
243  s->fs.on_event = do_vmaf;
244  return 0;
245 }
246 
248 {
249  static const enum AVPixelFormat pix_fmts[] = {
253  };
254 
255  AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
256  if (!fmts_list)
257  return AVERROR(ENOMEM);
258  return ff_set_common_formats(ctx, fmts_list);
259 }
260 
261 
263 {
264  AVFilterContext *ctx = inlink->dst;
265  LIBVMAFContext *s = ctx->priv;
266  int th;
267 
268  if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
269  ctx->inputs[0]->h != ctx->inputs[1]->h) {
270  av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
271  return AVERROR(EINVAL);
272  }
273  if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
274  av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
275  return AVERROR(EINVAL);
276  }
277 
278  s->desc = av_pix_fmt_desc_get(inlink->format);
279  s->width = ctx->inputs[0]->w;
280  s->height = ctx->inputs[0]->h;
281 
282  th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
283  if (th) {
284  av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
285  return AVERROR(EINVAL);
286  }
287  s->vmaf_thread_created = 1;
288 
289  return 0;
290 }
291 
292 static int config_output(AVFilterLink *outlink)
293 {
294  AVFilterContext *ctx = outlink->src;
295  LIBVMAFContext *s = ctx->priv;
296  AVFilterLink *mainlink = ctx->inputs[0];
297  int ret;
298 
299  ret = ff_framesync_init_dualinput(&s->fs, ctx);
300  if (ret < 0)
301  return ret;
302  outlink->w = mainlink->w;
303  outlink->h = mainlink->h;
304  outlink->time_base = mainlink->time_base;
305  outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
306  outlink->frame_rate = mainlink->frame_rate;
307  if ((ret = ff_framesync_configure(&s->fs)) < 0)
308  return ret;
309 
310  return 0;
311 }
312 
314 {
315  LIBVMAFContext *s = ctx->priv;
316  return ff_framesync_activate(&s->fs);
317 }
318 
320 {
321  LIBVMAFContext *s = ctx->priv;
322 
323  ff_framesync_uninit(&s->fs);
324 
326  s->eof = 1;
329 
330  if (s->vmaf_thread_created)
331  {
333  s->vmaf_thread_created = 0;
334  }
335 
336  av_frame_free(&s->gref);
337  av_frame_free(&s->gmain);
338 
341 }
342 
343 static const AVFilterPad libvmaf_inputs[] = {
344  {
345  .name = "main",
346  .type = AVMEDIA_TYPE_VIDEO,
347  },{
348  .name = "reference",
349  .type = AVMEDIA_TYPE_VIDEO,
350  .config_props = config_input_ref,
351  },
352  { NULL }
353 };
354 
355 static const AVFilterPad libvmaf_outputs[] = {
356  {
357  .name = "default",
358  .type = AVMEDIA_TYPE_VIDEO,
359  .config_props = config_output,
360  },
361  { NULL }
362 };
363 
365  .name = "libvmaf",
366  .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
367  .preinit = libvmaf_framesync_preinit,
368  .init = init,
369  .uninit = uninit,
370  .query_formats = query_formats,
371  .activate = activate,
372  .priv_size = sizeof(LIBVMAFContext),
373  .priv_class = &libvmaf_class,
374  .inputs = libvmaf_inputs,
375  .outputs = libvmaf_outputs,
376 };
#define NULL
Definition: coverity.c:32
static av_always_inline int pthread_mutex_destroy(pthread_mutex_t *mutex)
Definition: os2threads.h:108
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2522
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
#define pthread_mutex_lock(a)
Definition: ffprobe.c:61
static av_always_inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
Definition: os2threads.h:166
static int do_vmaf(FFFrameSync *fs)
Definition: vf_libvmaf.c:194
AVOption.
Definition: opt.h:246
static int query_formats(AVFilterContext *ctx)
Definition: vf_libvmaf.c:247
planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
Definition: pixfmt.h:159
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
Main libavfilter public API header.
const AVPixFmtDescriptor * desc
Definition: vf_libvmaf.c:42
int vmaf_thread_created
Definition: vf_libvmaf.c:46
static const AVFilterPad libvmaf_outputs[]
Definition: vf_libvmaf.c:355
AVFilter ff_vf_libvmaf
Definition: vf_libvmaf.c:364
int ff_framesync_configure(FFFrameSync *fs)
Configure a frame sync structure.
Definition: framesync.c:117
const char * master
Definition: vf_curves.c:117
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate.The lists are not just lists
static av_always_inline int pthread_cond_destroy(pthread_cond_t *cond)
Definition: os2threads.h:140
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
const char * name
Pad name.
Definition: internal.h:60
AVFilterContext * parent
Parent filter context.
Definition: framesync.h:152
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
uint8_t
#define av_cold
Definition: attributes.h:82
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:189
AVOptions.
int ff_framesync_init_dualinput(FFFrameSync *fs, AVFilterContext *parent)
Initialize a frame sync structure for dualinput.
Definition: framesync.c:361
int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1)
Definition: framesync.c:379
int av_frame_ref(AVFrame *dst, const AVFrame *src)
Set up a new reference to the data described by the source frame.
Definition: frame.c:443
static av_always_inline int pthread_cond_signal(pthread_cond_t *cond)
Definition: os2threads.h:148
#define av_log(a,...)
const char * name
Definition: pixdesc.h:82
A filter pad used for either input or output.
Definition: internal.h:54
planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
Definition: pixfmt.h:165
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
void ff_framesync_uninit(FFFrameSync *fs)
Free all memory currently allocated.
Definition: framesync.c:293
Frame sync structure.
Definition: framesync.h:146
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
Definition: pixfmt.h:161
int ff_framesync_activate(FFFrameSync *fs)
Examine the frames in the filter&#39;s input and try to produce output.
Definition: framesync.c:344
int(* on_event)(struct FFFrameSync *fs)
Callback called when a frame event is ready.
Definition: framesync.h:172
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
static const AVOption libvmaf_options[]
Definition: vf_libvmaf.c:74
pthread_cond_t cond
Definition: vf_libvmaf.c:49
#define th
Definition: regdef.h:75
#define OFFSET(x)
Definition: vf_libvmaf.c:71
static av_cold int init(AVFilterContext *ctx)
Definition: vf_libvmaf.c:231
static int read_frame(AVFilterContext *ctx, FPSContext *s, AVFilterLink *inlink, AVFilterLink *outlink)
Definition: vf_fps.c:180
#define read_frame_fn(type, bits)
Definition: vf_libvmaf.c:92
AVFormatContext * ctx
Definition: movenc.c:48
int enable_conf_interval
Definition: vf_libvmaf.c:67
pthread_mutex_t lock
Definition: vf_libvmaf.c:48
static av_always_inline int pthread_join(pthread_t thread, void **value_ptr)
Definition: os2threads.h:90
static av_always_inline int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
Definition: os2threads.h:100
#define s(width, name)
Definition: cbs_vp9.c:257
static int activate(AVFilterContext *ctx)
Definition: vf_libvmaf.c:313
#define pthread_mutex_unlock(a)
Definition: ffprobe.c:65
char * log_path
Definition: vf_libvmaf.c:55
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
AVFrame * gref
Definition: vf_libvmaf.c:52
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_libvmaf.c:319
static av_always_inline int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
Definition: os2threads.h:76
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
misc drawing utilities
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
#define FLAGS
Definition: vf_libvmaf.c:72
static const AVFilterPad libvmaf_inputs[]
Definition: vf_libvmaf.c:343
const char * name
Filter name.
Definition: avfilter.h:148
AVFrame * gmain
Definition: vf_libvmaf.c:51
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
FFFrameSync fs
Definition: vf_libvmaf.c:41
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
int
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
static void compute_vmaf_score(LIBVMAFContext *s)
Definition: vf_libvmaf.c:157
_fmutex pthread_mutex_t
Definition: os2threads.h:49
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
static void * call_vmaf(void *ctx)
Definition: vf_libvmaf.c:179
static av_always_inline int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
Definition: os2threads.h:129
pthread_t vmaf_thread
Definition: vf_libvmaf.c:47
FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs)
A list of supported formats for one end of a filter link.
Definition: formats.h:64
int enable_transform
Definition: vf_libvmaf.c:59
An instance of a filter.
Definition: avfilter.h:338
static int config_output(AVFilterLink *outlink)
Definition: vf_libvmaf.c:292
char * model_path
Definition: vf_libvmaf.c:54
double vmaf_score
Definition: vf_libvmaf.c:45
#define stride
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int depth
Number of bits in the component.
Definition: pixdesc.h:58
static int config_input_ref(AVFilterLink *inlink)
Definition: vf_libvmaf.c:262
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
char * log_fmt
Definition: vf_libvmaf.c:56