[FFmpeg-devel] [PATCH v2] avfilter/vf_libvmaf: Add metadata propagation support

Kyle Swanson k at ylo.ph
Fri Aug 30 07:44:41 EEST 2024


Hi,


On Mon, Aug 26, 2024 at 10:51=E2=80=AFAM Yigithan Yigit
<yigithanyigitdevel at gmail.com> wrote:
>
> ---
>  libavfilter/vf_libvmaf.c | 328 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 326 insertions(+), 2 deletions(-)
>
> diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
> index f655092b20..e6707aff53 100644
> --- a/libavfilter/vf_libvmaf.c
> +++ b/libavfilter/vf_libvmaf.c
> @@ -27,8 +27,11 @@
>  #include "config_components.h"
>
>  #include <libvmaf.h>
> +#include <libvmaf/version.h>
>
>  #include "libavutil/avstring.h"
> +#include "libavutil/dict.h"
> +#include "libavutil/frame.h"
>  #include "libavutil/mem.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/pixdesc.h"
> @@ -46,6 +49,31 @@
>  #include "libavutil/hwcontext_cuda_internal.h"
>  #endif
>
> +#define VMAF_VERSION_INT_VER(major, minor, patch) \
> +    ((major) * 10000 + (minor) * 100 + (patch))
> +
> +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR,=
 VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0)
> +#define CONFIG_LIBVMAF_METADATA_ENABLED 1
> +#else
> +#define CONFIG_LIBVMAF_METADATA_ENABLED 0
> +#endif

You should be able to check pkg_cfg and set this
CONFIG_LIBVMAF_METADATA_ENABLED define from the configure script.

> +
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +#include <stdatomic.h>
> +
> +typedef struct FrameList {
> +    AVFrame *frame;
> +    unsigned frame_number;
> +    unsigned propagated_handlers_cnt;
> +    struct FrameList *next;
> +} FrameList;
> +
> +typedef struct CallbackStruct {
> +    struct LIBVMAFContext *s;
> +    FrameList *frame_list;
> +} CallbackStruct;
> +#endif
> +
>  typedef struct LIBVMAFContext {
>      const AVClass *class;
>      FFFrameSync fs;
> @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext {
>      int n_subsample;
>      char *model_cfg;
>      char *feature_cfg;
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    char *metadata_feature_cfg;
> +    struct {
> +        VmafMetadataConfiguration *metadata_cfgs;
> +        unsigned metadata_cfg_cnt;
> +    } metadata_cfg_list;
> +    CallbackStruct *cb;
> +    atomic_uint outlink_eof;
> +    atomic_uint eof_frame;
> +#endif
>      VmafContext *vmaf;
>      VmafModel **model;
> +    int flushed;
>      unsigned model_cnt;
>      unsigned frame_cnt;
>      unsigned bpc;
> @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] =3D {
>      {"n_subsample", "Set interval for frame subsampling used when comput=
ing vmaf.",     OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=3D1}, 1, UINT_M=
AX, FLAGS},
>      {"model",  "Set the model to be used for computing vmaf.",          =
                OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str=3D"version=3Dv=
maf_v0.6.1"}, 0, 1, FLAGS},
>      {"feature",  "Set the feature to be used for computing vmaf.",      =
                OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=3DNULL}, 0, =
1, FLAGS},
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    {"metadata_handler",  "Set the feature to be propagated as metadata.=
",              OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str=3D"=
name=3Dvmaf"}, 0, 1, FLAGS},

Would be better to make this option a bool. When true, propagate all
registered features and models. You can read the names during init,
they should be available inside `parse_models()` and
`parse_features()`.

> +#endif
>      { NULL }
>  };
>
> @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixe=
lFormat av_pix_fmt)
>      }
>  }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned =
frame_number)
> +{
> +    FrameList *new_frame =3D av_malloc(sizeof(FrameList));
> +    if (!new_frame)
> +        return AVERROR(ENOMEM);
> +
> +    new_frame->frame =3D frame;
> +    new_frame->frame_number =3D frame_number;
> +    new_frame->propagated_handlers_cnt =3D 0;
> +    new_frame->next =3D NULL;
> +
> +    if (*head =3D=3D NULL) {
> +        *head =3D new_frame;
> +    } else {
> +        FrameList *current =3D *head;
> +        while (current->next !=3D NULL) {
> +            current =3D current->next;
> +        }
> +        current->next =3D new_frame;
> +    }
> +
> +    return 0;
> +}
> +
> +static int remove_from_frame_list(FrameList **frame_list, unsigned frame=
_number)
> +{
> +    FrameList *cur =3D *frame_list;
> +    FrameList *prev =3D NULL;
> +
> +    while (cur) {
> +        if (cur->frame_number =3D=3D frame_number) {
> +            if (prev)
> +                prev->next =3D cur->next;
> +            else
> +                *frame_list =3D cur->next;
> +            av_free(cur);
> +            return 0;
> +        }
> +        prev =3D cur;
> +        cur =3D cur->next;
> +    }
> +
> +    return AVERROR(EINVAL);
> +}
> +
> +static int free_frame_list(FrameList **frame_list)
> +{
> +    FrameList *cur =3D *frame_list;
> +    while (cur) {
> +        FrameList *next =3D cur->next;
> +        av_frame_free(&cur->frame);
> +        av_free(cur);
> +        cur =3D next;
> +    }
> +    *frame_list =3D NULL;
> +    return 0;
> +}
> +
> +static FrameList* get_frame_from_frame_list(FrameList *frame_list,
> +                                          unsigned frame_number)
> +{
> +    FrameList *cur =3D frame_list;
> +    while (cur) {
> +        if (cur->frame_number =3D=3D frame_number)
> +            return cur;
> +        cur =3D cur->next;
> +    }
> +    return NULL;
> +}
> +

Would be great if we didn't need to invent a data structure here. I
guess av_fifo is no good here because metadata callbacks are not
guaranteed to come in order?

> +static void set_meta(void *data, VmafMetadata *metadata)
> +{
> +    int err =3D 0;
> +    FrameList *current_frame =3D NULL;
> +    CallbackStruct *cb =3D data;
> +    char value[128], key[128];
> +    snprintf(value, sizeof(value), "%0.2f", metadata->score);
> +    snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata=
->picture_index);
> +
> +    current_frame =3D get_frame_from_frame_list(cb->frame_list, metadata=
->picture_index);
> +    if (!current_frame) {
> +        av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\=
n",
> +               metadata->picture_index);
> +        return;
> +    }
> +
> +    err =3D av_dict_set(&current_frame->frame->metadata, key, value, 0);
> +    if (err < 0)
> +        av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key);
> +
> +    current_frame->propagated_handlers_cnt++;
> +
> +    if (current_frame->propagated_handlers_cnt =3D=3D cb->s->metadata_cf=
g_list.metadata_cfg_cnt) {
> +        FrameList *cur =3D cb->frame_list;
> +        // This code block allows to send frames monotonically
> +        while(cur && cur->frame_number <=3D metadata->picture_index) {
> +            if (cur->propagated_handlers_cnt =3D=3D cb->s->metadata_cfg_=
list.metadata_cfg_cnt) {
> +                FrameList *next;
> +                // Check outlink is closed
> +                if (!cb->s->outlink_eof) {
> +                    av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature=
: %d, score: %f\n", cur->frame_number, metadata->score);
> +                    cb->s->eof_frame =3D cur->frame_number;
> +                    if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur=
->frame))
> +                        return;
> +                }
> +                next =3D cur->next;
> +                remove_from_frame_list(&cb->frame_list, cur->frame_numbe=
r);
> +                cur =3D next;
> +            }
> +            else
> +                break;
> +        }
> +    }
> +}
> +#endif
> +
>  static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bp=
c)
>  {
>      const int bytes_per_value =3D bpc > 8 ? 2 : 1;
> @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs)
>          return AVERROR(ENOMEM);
>      }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    err =3D add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt);
> +    if (err) {
> +        av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n");
> +        return AVERROR(ENOMEM);
> +    }
> +#endif
> +
>      err =3D vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cn=
t++);
>      if (err) {
>          av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
>          return AVERROR(EINVAL);
>      }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    if (s->metadata_cfg_list.metadata_cfg_cnt)
> +        return 0;
> +    else
> +        return ff_filter_frame(ctx->outputs[0], dist);
> +#else
>      return ff_filter_frame(ctx->outputs[0], dist);
> +#endif
>  }
>
>  static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt)
> @@ -408,6 +582,83 @@ exit:
>      return err;
>  }
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +static int parse_metadata_handlers(AVFilterContext *ctx)
> +{
> +    LIBVMAFContext *s =3D ctx->priv;
> +    AVDictionary **dict;
> +    unsigned dict_cnt;
> +    int err =3D 0;
> +
> +    if (!s->metadata_feature_cfg)
> +        return 0;
> +
> +    dict_cnt =3D 0;
> +    dict =3D delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt);
> +    if (!dict) {
> +        av_log(ctx, AV_LOG_ERROR,
> +               "could not parse metadata feature config: %s\n",
> +               s->metadata_feature_cfg);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    for (unsigned i =3D 0; i < dict_cnt; i++) {
> +        VmafMetadataConfiguration *metadata_cfg =3D av_calloc(1, sizeof(=
*metadata_cfg));
> +        const AVDictionaryEntry *e =3D NULL;
> +        char *feature_name =3D NULL;
> +
> +        while (e =3D av_dict_iterate(dict[i], e)) {
> +            if (!strcmp(e->key, "name")) {
> +                metadata_cfg->feature_name =3D av_strdup(e->value);
> +                continue;
> +            }
> +        }
> +
> +        metadata_cfg->data =3D s->cb;
> +        metadata_cfg->callback =3D &set_meta;
> +
> +        err =3D vmaf_register_metadata_handler(s->vmaf, *metadata_cfg);
> +        if (err) {
> +            av_log(ctx, AV_LOG_ERROR,
> +                   "problem during vmaf_register_metadata_handler: %s\n"=
,
> +                   feature_name);
> +            goto exit;
> +        }
> +
> +        s->metadata_cfg_list.metadata_cfgs =3D av_realloc(s->metadata_cf=
g_list.metadata_cfgs,
> +                                             (s->metadata_cfg_list.metad=
ata_cfg_cnt + 1) *
> +                                             sizeof(*s->metadata_cfg_lis=
t.metadata_cfgs));
> +        if (!s->metadata_cfg_list.metadata_cfgs) {
> +            err =3D AVERROR(ENOMEM);
> +            goto exit;
> +        }
> +
> +        s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata=
_cfg_cnt++] =3D *metadata_cfg;
> +    }
> +
> +exit:
> +    for (unsigned i =3D 0; i < dict_cnt; i++) {
> +        if (dict[i])
> +            av_dict_free(&dict[i]);
> +    }
> +    av_free(dict);
> +    return err;
> +}
> +
> +static int init_metadata(AVFilterContext *ctx)
> +{
> +    LIBVMAFContext *s =3D ctx->priv;
> +
> +    s->cb =3D av_calloc(1, sizeof(CallbackStruct));
> +    if (!s->cb)
> +        return AVERROR(ENOMEM);
> +
> +    s->cb->s =3D s;
> +
> +    return 0;
> +}
> +#endif
> +
>  static enum VmafLogLevel log_level_map(int log_level)
>  {
>      switch (log_level) {
> @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx)
>      if (err)
>          return AVERROR(EINVAL);
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    err =3D init_metadata(ctx);
> +    if (err)
> +        return err;
> +
> +    err =3D parse_metadata_handlers(ctx);
> +    if (err)
> +        return err;
> +#endif
> +
>      err =3D parse_models(ctx);
>      if (err)
>          return err;
> @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink)
>  static int activate(AVFilterContext *ctx)
>  {
>      LIBVMAFContext *s =3D ctx->priv;
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    // There are 2 cases for metadata propagation:
> +    // 1. Where the case that outlink closes
> +    // 2. Where inlink closes
> +    // Case 1:
> +    //   In this case we need check outlink somehow for the status in ev=
ery iteration.
> +    //   If outlink is not wanting frame anymore, we need to proceed wit=
h uninit with setting inlink.
> +    //   But nature of multithreading settting eof inside the activate c=
all can make sync issues and
> +    //   can lead to extra propagated frames. Atomic variables are used =
to avoid this.
> +    // Case 2:
> +    //   This case relatively easy to handle. Because of calculation of =
vmaf score takes time
> +    //   So `do_vmaf` buffers many of frames before sending to outlink t=
hat causes
> +    //   premature close of outlink.
> +    //   Checking inlink status is enough and if inlink =3D=3D eof flush=
ing vmaf is enough for this.
> +    int64_t pts;
> +    int status, ret =3D 0;
> +
> +    if (ff_outlink_get_status(ctx->outputs[0]))
> +        s->outlink_eof =3D 1;
> +
> +    if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) &&
> +        ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) {
> +        if (!s->flushed) {
> +            ret =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
> +            if (ret)
> +                av_log(ctx, AV_LOG_ERROR,
> +                       "problem flushing libvmaf context.\n");
> +            else
> +                s->flushed =3D 1;
> +        }
> +    }
> +#endif
>      return ff_framesync_activate(&s->fs);
>  }
>
> @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx)
>      LIBVMAFContext *s =3D ctx->priv;
>      int err =3D 0;
>
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    if (!s->outlink_eof)
> +        s->outlink_eof =3D 1;
> +#endif
> +
>      ff_framesync_uninit(&s->fs);
>
>      if (!s->frame_cnt)
>          goto clean_up;
>
> -    err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
> +    if (!s->flushed) {
> +        err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
> +        if (err) {
> +            av_log(ctx, AV_LOG_ERROR,
> +                   "problem flushing libvmaf context.\n");
> +        } else
> +            s->flushed =3D 1;
> +    }
> +
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +    if (s->metadata_cfg_list.metadata_cfgs) {
> +        for (unsigned i =3D 0; i < s->metadata_cfg_list.metadata_cfg_cnt=
; i++) {
> +            av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name);
> +        }
> +        av_free(s->metadata_cfg_list.metadata_cfgs);
> +    }
> +
> +    err =3D free_frame_list(&s->cb->frame_list);
>      if (err) {
>          av_log(ctx, AV_LOG_ERROR,
> -               "problem flushing libvmaf context.\n");
> +               "problem freeing frame list.\n");
>      }
> +#endif
>
>      for (unsigned i =3D 0; i < s->model_cnt; i++) {
>          double vmaf_score;
> +
> +#if CONFIG_LIBVMAF_METADATA_ENABLED
> +        err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
s->pool),
> +                                &vmaf_score, 0, s->eof_frame);
> +        av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_fra=
me, s->frame_cnt - 1);
> +#else
>          err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
s->pool),
>                                  &vmaf_score, 0, s->frame_cnt - 1);
> +#endif
> +
>          if (err) {
>              av_log(ctx, AV_LOG_ERROR,
>                     "problem getting pooled vmaf score.\n");
> --
> 2.45.2
>

Thanks,
Kyle


More information about the ffmpeg-devel mailing list