[FFmpeg-devel] [PATCH v2] avfilter/vf_libvmaf: Add metadata propagation support
Kyle Swanson
k at ylo.ph
Fri Aug 30 07:44:41 EEST 2024
Hi,
On Mon, Aug 26, 2024 at 10:51=E2=80=AFAM Yigithan Yigit
<yigithanyigitdevel at gmail.com> wrote:
>
> ---
> libavfilter/vf_libvmaf.c | 328 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 326 insertions(+), 2 deletions(-)
>
> diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
> index f655092b20..e6707aff53 100644
> --- a/libavfilter/vf_libvmaf.c
> +++ b/libavfilter/vf_libvmaf.c
> @@ -27,8 +27,11 @@
> #include "config_components.h"
>
> #include <libvmaf.h>
> +#include <libvmaf/version.h>
>
> #include "libavutil/avstring.h"
> +#include "libavutil/dict.h"
> +#include "libavutil/frame.h"
> #include "libavutil/mem.h"
> #include "libavutil/opt.h"
> #include "libavutil/pixdesc.h"
> @@ -46,6 +49,31 @@
> #include "libavutil/hwcontext_cuda_internal.h"
> #endif
>
> +#define VMAF_VERSION_INT_VER(major, minor, patch) \
> + ((major) * 10000 + (minor) * 100 + (patch))
> +
> +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR,=
VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0)
> +#define CONFIG_LIBVMAF_METADATA_ENABLED 1
> +#else
> +#define CONFIG_LIBVMAF_METADATA_ENABLED 0
> +#endif
You should be able to check pkg_cfg and set this
CONFIG_LIBVMAF_METADATA_ENABLED define from the configure script.
> +
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +#include <stdatomic.h>
> +
> +typedef struct FrameList {
> + AVFrame *frame;
> + unsigned frame_number;
> + unsigned propagated_handlers_cnt;
> + struct FrameList *next;
> +} FrameList;
> +
> +typedef struct CallbackStruct {
> + struct LIBVMAFContext *s;
> + FrameList *frame_list;
> +} CallbackStruct;
> +#endif
> +
> typedef struct LIBVMAFContext {
> const AVClass *class;
> FFFrameSync fs;
> @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext {
> int n_subsample;
> char *model_cfg;
> char *feature_cfg;
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + char *metadata_feature_cfg;
> + struct {
> + VmafMetadataConfiguration *metadata_cfgs;
> + unsigned metadata_cfg_cnt;
> + } metadata_cfg_list;
> + CallbackStruct *cb;
> + atomic_uint outlink_eof;
> + atomic_uint eof_frame;
> +#endif
> VmafContext *vmaf;
> VmafModel **model;
> + int flushed;
> unsigned model_cnt;
> unsigned frame_cnt;
> unsigned bpc;
> @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] =3D {
> {"n_subsample", "Set interval for frame subsampling used when comput=
ing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=3D1}, 1, UINT_M=
AX, FLAGS},
> {"model", "Set the model to be used for computing vmaf.", =
OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str=3D"version=3Dv=
maf_v0.6.1"}, 0, 1, FLAGS},
> {"feature", "Set the feature to be used for computing vmaf.", =
OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=3DNULL}, 0, =
1, FLAGS},
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + {"metadata_handler", "Set the feature to be propagated as metadata.=
", OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str=3D"=
name=3Dvmaf"}, 0, 1, FLAGS},
Would be better to make this option a bool. When true, propagate all
registered features and models. You can read the names during init,
they should be available inside `parse_models()` and
`parse_features()`.
> +#endif
> { NULL }
> };
>
> @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixe=
lFormat av_pix_fmt)
> }
> }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned =
frame_number)
> +{
> + FrameList *new_frame =3D av_malloc(sizeof(FrameList));
> + if (!new_frame)
> + return AVERROR(ENOMEM);
> +
> + new_frame->frame =3D frame;
> + new_frame->frame_number =3D frame_number;
> + new_frame->propagated_handlers_cnt =3D 0;
> + new_frame->next =3D NULL;
> +
> + if (*head =3D=3D NULL) {
> + *head =3D new_frame;
> + } else {
> + FrameList *current =3D *head;
> + while (current->next !=3D NULL) {
> + current =3D current->next;
> + }
> + current->next =3D new_frame;
> + }
> +
> + return 0;
> +}
> +
> +static int remove_from_frame_list(FrameList **frame_list, unsigned frame=
_number)
> +{
> + FrameList *cur =3D *frame_list;
> + FrameList *prev =3D NULL;
> +
> + while (cur) {
> + if (cur->frame_number =3D=3D frame_number) {
> + if (prev)
> + prev->next =3D cur->next;
> + else
> + *frame_list =3D cur->next;
> + av_free(cur);
> + return 0;
> + }
> + prev =3D cur;
> + cur =3D cur->next;
> + }
> +
> + return AVERROR(EINVAL);
> +}
> +
> +static int free_frame_list(FrameList **frame_list)
> +{
> + FrameList *cur =3D *frame_list;
> + while (cur) {
> + FrameList *next =3D cur->next;
> + av_frame_free(&cur->frame);
> + av_free(cur);
> + cur =3D next;
> + }
> + *frame_list =3D NULL;
> + return 0;
> +}
> +
> +static FrameList* get_frame_from_frame_list(FrameList *frame_list,
> + unsigned frame_number)
> +{
> + FrameList *cur =3D frame_list;
> + while (cur) {
> + if (cur->frame_number =3D=3D frame_number)
> + return cur;
> + cur =3D cur->next;
> + }
> + return NULL;
> +}
> +
Would be great if we didn't need to invent a data structure here. I
guess av_fifo is no good here because metadata callbacks are not
guaranteed to come in order?
> +static void set_meta(void *data, VmafMetadata *metadata)
> +{
> + int err =3D 0;
> + FrameList *current_frame =3D NULL;
> + CallbackStruct *cb =3D data;
> + char value[128], key[128];
> + snprintf(value, sizeof(value), "%0.2f", metadata->score);
> + snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata=
->picture_index);
> +
> + current_frame =3D get_frame_from_frame_list(cb->frame_list, metadata=
->picture_index);
> + if (!current_frame) {
> + av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\=
n",
> + metadata->picture_index);
> + return;
> + }
> +
> + err =3D av_dict_set(¤t_frame->frame->metadata, key, value, 0);
> + if (err < 0)
> + av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key);
> +
> + current_frame->propagated_handlers_cnt++;
> +
> + if (current_frame->propagated_handlers_cnt =3D=3D cb->s->metadata_cf=
g_list.metadata_cfg_cnt) {
> + FrameList *cur =3D cb->frame_list;
> + // This code block allows to send frames monotonically
> + while(cur && cur->frame_number <=3D metadata->picture_index) {
> + if (cur->propagated_handlers_cnt =3D=3D cb->s->metadata_cfg_=
list.metadata_cfg_cnt) {
> + FrameList *next;
> + // Check outlink is closed
> + if (!cb->s->outlink_eof) {
> + av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature=
: %d, score: %f\n", cur->frame_number, metadata->score);
> + cb->s->eof_frame =3D cur->frame_number;
> + if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur=
->frame))
> + return;
> + }
> + next =3D cur->next;
> + remove_from_frame_list(&cb->frame_list, cur->frame_numbe=
r);
> + cur =3D next;
> + }
> + else
> + break;
> + }
> + }
> +}
> +#endif
> +
> static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bp=
c)
> {
> const int bytes_per_value =3D bpc > 8 ? 2 : 1;
> @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs)
> return AVERROR(ENOMEM);
> }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + err =3D add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt);
> + if (err) {
> + av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n");
> + return AVERROR(ENOMEM);
> + }
> +#endif
> +
> err =3D vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cn=
t++);
> if (err) {
> av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
> return AVERROR(EINVAL);
> }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + if (s->metadata_cfg_list.metadata_cfg_cnt)
> + return 0;
> + else
> + return ff_filter_frame(ctx->outputs[0], dist);
> +#else
> return ff_filter_frame(ctx->outputs[0], dist);
> +#endif
> }
>
> static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt)
> @@ -408,6 +582,83 @@ exit:
> return err;
> }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +static int parse_metadata_handlers(AVFilterContext *ctx)
> +{
> + LIBVMAFContext *s =3D ctx->priv;
> + AVDictionary **dict;
> + unsigned dict_cnt;
> + int err =3D 0;
> +
> + if (!s->metadata_feature_cfg)
> + return 0;
> +
> + dict_cnt =3D 0;
> + dict =3D delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt);
> + if (!dict) {
> + av_log(ctx, AV_LOG_ERROR,
> + "could not parse metadata feature config: %s\n",
> + s->metadata_feature_cfg);
> + return AVERROR(EINVAL);
> + }
> +
> + for (unsigned i =3D 0; i < dict_cnt; i++) {
> + VmafMetadataConfiguration *metadata_cfg =3D av_calloc(1, sizeof(=
*metadata_cfg));
> + const AVDictionaryEntry *e =3D NULL;
> + char *feature_name =3D NULL;
> +
> + while (e =3D av_dict_iterate(dict[i], e)) {
> + if (!strcmp(e->key, "name")) {
> + metadata_cfg->feature_name =3D av_strdup(e->value);
> + continue;
> + }
> + }
> +
> + metadata_cfg->data =3D s->cb;
> + metadata_cfg->callback =3D &set_meta;
> +
> + err =3D vmaf_register_metadata_handler(s->vmaf, *metadata_cfg);
> + if (err) {
> + av_log(ctx, AV_LOG_ERROR,
> + "problem during vmaf_register_metadata_handler: %s\n"=
,
> + feature_name);
> + goto exit;
> + }
> +
> + s->metadata_cfg_list.metadata_cfgs =3D av_realloc(s->metadata_cf=
g_list.metadata_cfgs,
> + (s->metadata_cfg_list.metad=
ata_cfg_cnt + 1) *
> + sizeof(*s->metadata_cfg_lis=
t.metadata_cfgs));
> + if (!s->metadata_cfg_list.metadata_cfgs) {
> + err =3D AVERROR(ENOMEM);
> + goto exit;
> + }
> +
> + s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata=
_cfg_cnt++] =3D *metadata_cfg;
> + }
> +
> +exit:
> + for (unsigned i =3D 0; i < dict_cnt; i++) {
> + if (dict[i])
> + av_dict_free(&dict[i]);
> + }
> + av_free(dict);
> + return err;
> +}
> +
> +static int init_metadata(AVFilterContext *ctx)
> +{
> + LIBVMAFContext *s =3D ctx->priv;
> +
> + s->cb =3D av_calloc(1, sizeof(CallbackStruct));
> + if (!s->cb)
> + return AVERROR(ENOMEM);
> +
> + s->cb->s =3D s;
> +
> + return 0;
> +}
> +#endif
> +
> static enum VmafLogLevel log_level_map(int log_level)
> {
> switch (log_level) {
> @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx)
> if (err)
> return AVERROR(EINVAL);
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + err =3D init_metadata(ctx);
> + if (err)
> + return err;
> +
> + err =3D parse_metadata_handlers(ctx);
> + if (err)
> + return err;
> +#endif
> +
> err =3D parse_models(ctx);
> if (err)
> return err;
> @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink)
> static int activate(AVFilterContext *ctx)
> {
> LIBVMAFContext *s =3D ctx->priv;
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + // There are 2 cases for metadata propagation:
> + // 1. Where the case that outlink closes
> + // 2. Where inlink closes
> + // Case 1:
> + // In this case we need check outlink somehow for the status in ev=
ery iteration.
> + // If outlink is not wanting frame anymore, we need to proceed wit=
h uninit with setting inlink.
> + // But nature of multithreading settting eof inside the activate c=
all can make sync issues and
> + // can lead to extra propagated frames. Atomic variables are used =
to avoid this.
> + // Case 2:
> + // This case relatively easy to handle. Because of calculation of =
vmaf score takes time
> + // So `do_vmaf` buffers many of frames before sending to outlink t=
hat causes
> + // premature close of outlink.
> + // Checking inlink status is enough and if inlink =3D=3D eof flush=
ing vmaf is enough for this.
> + int64_t pts;
> + int status, ret =3D 0;
> +
> + if (ff_outlink_get_status(ctx->outputs[0]))
> + s->outlink_eof =3D 1;
> +
> + if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) &&
> + ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) {
> + if (!s->flushed) {
> + ret =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
> + if (ret)
> + av_log(ctx, AV_LOG_ERROR,
> + "problem flushing libvmaf context.\n");
> + else
> + s->flushed =3D 1;
> + }
> + }
> +#endif
> return ff_framesync_activate(&s->fs);
> }
>
> @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx)
> LIBVMAFContext *s =3D ctx->priv;
> int err =3D 0;
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + if (!s->outlink_eof)
> + s->outlink_eof =3D 1;
> +#endif
> +
> ff_framesync_uninit(&s->fs);
>
> if (!s->frame_cnt)
> goto clean_up;
>
> - err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
> + if (!s->flushed) {
> + err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
> + if (err) {
> + av_log(ctx, AV_LOG_ERROR,
> + "problem flushing libvmaf context.\n");
> + } else
> + s->flushed =3D 1;
> + }
> +
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + if (s->metadata_cfg_list.metadata_cfgs) {
> + for (unsigned i =3D 0; i < s->metadata_cfg_list.metadata_cfg_cnt=
; i++) {
> + av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name);
> + }
> + av_free(s->metadata_cfg_list.metadata_cfgs);
> + }
> +
> + err =3D free_frame_list(&s->cb->frame_list);
> if (err) {
> av_log(ctx, AV_LOG_ERROR,
> - "problem flushing libvmaf context.\n");
> + "problem freeing frame list.\n");
> }
> +#endif
>
> for (unsigned i =3D 0; i < s->model_cnt; i++) {
> double vmaf_score;
> +
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> + err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
s->pool),
> + &vmaf_score, 0, s->eof_frame);
> + av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_fra=
me, s->frame_cnt - 1);
> +#else
> err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
s->pool),
> &vmaf_score, 0, s->frame_cnt - 1);
> +#endif
> +
> if (err) {
> av_log(ctx, AV_LOG_ERROR,
> "problem getting pooled vmaf score.\n");
> --
> 2.45.2
>
Thanks,
Kyle
More information about the ffmpeg-devel
mailing list