[FFmpeg-devel] [PATCH] avformat/matroska: Handle TargetType and nested SimpleTags in metadata

Andreas Rheinhardt andreas.rheinhardt at gmail.com
Tue Dec 8 21:58:19 EET 2020


zsugabubus at national.shitposting.agency:
> Metadata names now recognized in the following format:
> 
>   [ [TargetTypeValue][TargetType] "/" ] TagName [ "/" TagName ]... [ "@" [ "-" ] [TagLanguage] ]
> 
> Language designators separated with "@" instead of "-" to avoid future
> ambiguity about IETF language tags, like "en-US" and similar.
> "-" designates TagDefault := 0.
> 
> Signed-off-by: zsugabubus <zsugabubus at national.shitposting.agency>
> ---
>  libavformat/matroska.c    |  56 ++++++-
>  libavformat/matroska.h    |   9 ++
>  libavformat/matroskadec.c | 286 +++++++++++++++++++++--------------
>  libavformat/matroskaenc.c | 308 ++++++++++++++++++++++++++------------
>  4 files changed, 449 insertions(+), 210 deletions(-)
> 
> diff --git a/libavformat/matroska.c b/libavformat/matroska.c
> index 7c56aba..4fcade2 100644
> --- a/libavformat/matroska.c
> +++ b/libavformat/matroska.c
> @@ -120,11 +120,63 @@ const CodecTags ff_webm_codec_tags[] = {
>  };
>  
>  const AVMetadataConv ff_mkv_metadata_conv[] = {
> -    { "LEAD_PERFORMER", "performer" },
> -    { "PART_NUMBER"   , "track"  },
> +    { "TRACK/ARTIST",        "artist"       },
> +    { "ALBUM/ARTIST",        "album_artist" },
> +    { "ALBUM/TITLE",         "album"        },
> +    { "ALBUM/DATE_RELEASED", "date"         },
> +    { "TRACK/TITLE",         "title"        },
> +    { "ALBUM/PART_NUMBER",   "track"        },
> +    { "ALBUM/TOTAL_PARTS",   "track_total"  },
> +    { "VOLUME/PART_NUMBER",  "disc"         },
> +    { "VOLUME/TOTAL_PARTS",  "disc_total"   },
> +    { "TRACK/GENRE",         "genre"        },
> +
> +    { "50/COMMENT",          "comment"      },
> +    { "ALBUM/COMMENT",       "comment"      },
> +    { "EPISODE/COMMENT",     "comment"      },
>      { 0 }
>  };
>  
> +#define A (1 << AVMEDIA_TYPE_AUDIO)
> +#define V (1 << AVMEDIA_TYPE_VIDEO)
> +#define AV A | V
> +
> +/* Note: When converting from typevalues -> strings first matching entry is used. */
> +const TypeValueConv ff_mkv_typevalue_conv[] = {
> +    { "COLLECTION", 70, AV },
> +
> +    { "SEASON",     60, V  },
> +    { "VOLUME",     60, AV },
> +    { "EDITION",    60, A  },
> +    { "ISSUE",      60, A  },
> +    { "OPUS",       60, A  },
> +    { "SEQUEL",     60, V  },
> +
> +    { "ALBUM",      50, A  },
> +    { "EPISODE",    50, V  },
> +    { "OPERA",      50, A  },
> +    { "MOVIE",      50, V  },
> +    { "CONCERT",    50, AV },
> +
> +    { "PART",       40, AV },
> +    { "SESSION",    40, AV },
> +
> +    { "TRACK",      30, A  },
> +    { "CHAPTER",    30, V  },
> +    { "SONG",       30, A  },
> +
> +    { "SUBTRACK",   20, A  },
> +    { "MOVEMENT",   20, A  },
> +    { "SCENE",      20, V  },
> +
> +    { "SHOT",       10, V  },
> +    { "",            0, -1 }
> +};
> +
> +#undef A
> +#undef V
> +#undef AV
> +
>  const char * const ff_matroska_video_stereo_mode[MATROSKA_VIDEO_STEREOMODE_TYPE_NB] = {
>      "mono",
>      "left_right",
> diff --git a/libavformat/matroska.h b/libavformat/matroska.h
> index 6f198f0..6dc9582 100644
> --- a/libavformat/matroska.h
> +++ b/libavformat/matroska.h
> @@ -355,14 +355,23 @@ typedef struct CodecTags{
>      enum AVCodecID id;
>  }CodecTags;
>  
> +typedef struct {
> +    char str[sizeof "COLLECTION"];
> +    uint8_t typevalue;
> +    uint8_t codec_types; /* (1 << AVMEDIA_TYPE_*)... */
> +} TypeValueConv;
> +
>  /* max. depth in the EBML tree structure */
>  #define EBML_MAX_DEPTH 16
>  
>  #define MATROSKA_VIDEO_STEREO_PLANE_COUNT  3
>  
> +enum { MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE = 50 };
> +
>  extern const CodecTags ff_mkv_codec_tags[];
>  extern const CodecTags ff_webm_codec_tags[];
>  extern const AVMetadataConv ff_mkv_metadata_conv[];
> +extern const TypeValueConv ff_mkv_typevalue_conv[];
>  extern const char * const ff_matroska_video_stereo_mode[MATROSKA_VIDEO_STEREOMODE_TYPE_NB];
>  extern const char * const ff_matroska_video_stereo_plane[MATROSKA_VIDEO_STEREO_PLANE_COUNT];
>  
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 5455594..1c0478d 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -98,8 +98,8 @@ typedef enum {
>  typedef const struct EbmlSyntax {
>      uint32_t id;
>      EbmlType type;
> -    size_t list_elem_size;
> -    size_t data_offset;
> +    unsigned list_elem_size;
> +    unsigned data_offset;
>      union {
>          int64_t     i;
>          uint64_t    u;
> @@ -228,8 +228,8 @@ typedef struct MatroskaTrackOperation {
>  } MatroskaTrackOperation;
>  
>  typedef struct MatroskaTrack {
> +    uint64_t uid; /* Must be the first field. */
>      uint64_t num;
> -    uint64_t uid;
>      uint64_t type;
>      char    *name;
>      char    *codec_id;
> @@ -258,7 +258,7 @@ typedef struct MatroskaTrack {
>  } MatroskaTrack;
>  
>  typedef struct MatroskaAttachment {
> -    uint64_t uid;
> +    uint64_t uid; /* Must be the first field. */
>      char *filename;
>      char *description;
>      char *mime;
> @@ -268,9 +268,9 @@ typedef struct MatroskaAttachment {
>  } MatroskaAttachment;
>  
>  typedef struct MatroskaChapter {
> +    uint64_t uid; /* Must be the first field. */
>      uint64_t start;
>      uint64_t end;
> -    uint64_t uid;
>      char    *title;
>  
>      AVChapter *chapter;
> @@ -286,26 +286,26 @@ typedef struct MatroskaIndex {
>      EbmlList pos;
>  } MatroskaIndex;
>  
> -typedef struct MatroskaTag {
> +typedef struct MatroskaSimpleTag {
>      char *name;
>      char *string;
>      char *lang;
>      uint64_t def;
>      EbmlList sub;
> -} MatroskaTag;
> +} MatroskaSimpleTag;
>  
>  typedef struct MatroskaTagTarget {
>      char    *type;
>      uint64_t typevalue;
> -    uint64_t trackuid;
> -    uint64_t chapteruid;
> -    uint64_t attachuid;
> +    EbmlList trackuids;
> +    EbmlList chapteruids;
> +    EbmlList attachuids;
>  } MatroskaTagTarget;
>  
> -typedef struct MatroskaTags {
> +typedef struct MatroskaTag {
>      MatroskaTagTarget target;
> -    EbmlList tag;
> -} MatroskaTags;
> +    EbmlList simpletags;
> +} MatroskaTag;
>  
>  typedef struct MatroskaSeekhead {
>      uint64_t id;
> @@ -652,32 +652,32 @@ static EbmlSyntax matroska_index[] = {
>  };
>  
>  static EbmlSyntax matroska_simpletag[] = {
> -    { MATROSKA_ID_TAGNAME,        EBML_UTF8, 0,                   offsetof(MatroskaTag, name) },
> -    { MATROSKA_ID_TAGSTRING,      EBML_UTF8, 0,                   offsetof(MatroskaTag, string) },
> -    { MATROSKA_ID_TAGLANG,        EBML_STR,  0,                   offsetof(MatroskaTag, lang), { .s = "und" } },
> -    { MATROSKA_ID_TAGDEFAULT,     EBML_UINT, 0,                   offsetof(MatroskaTag, def) },
> -    { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0,                   offsetof(MatroskaTag, def) },
> -    { MATROSKA_ID_SIMPLETAG,      EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag, sub),  { .n = matroska_simpletag } },
> +    { MATROSKA_ID_TAGNAME,        EBML_UTF8, 0,                         offsetof(MatroskaSimpleTag, name) },
> +    { MATROSKA_ID_TAGSTRING,      EBML_UTF8, 0,                         offsetof(MatroskaSimpleTag, string) },
> +    { MATROSKA_ID_TAGLANG,        EBML_STR,  0,                         offsetof(MatroskaSimpleTag, lang), { .s = "und" } },
> +    { MATROSKA_ID_TAGDEFAULT,     EBML_UINT, 0,                         offsetof(MatroskaSimpleTag, def), { .u = 1 } },
> +    { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0,                         offsetof(MatroskaSimpleTag, def), { .u = 1 } },
> +    { MATROSKA_ID_SIMPLETAG,      EBML_NEST, sizeof(MatroskaSimpleTag), offsetof(MatroskaSimpleTag, sub), { .n = matroska_simpletag } },
>      CHILD_OF(matroska_tag)
>  };
>  
>  static EbmlSyntax matroska_tagtargets[] = {
> -    { MATROSKA_ID_TAGTARGETS_TYPE,       EBML_STR,  0, offsetof(MatroskaTagTarget, type) },
> -    { MATROSKA_ID_TAGTARGETS_TYPEVALUE,  EBML_UINT, 0, offsetof(MatroskaTagTarget, typevalue), { .u = 50 } },
> -    { MATROSKA_ID_TAGTARGETS_TRACKUID,   EBML_UINT, 0, offsetof(MatroskaTagTarget, trackuid) },
> -    { MATROSKA_ID_TAGTARGETS_CHAPTERUID, EBML_UINT, 0, offsetof(MatroskaTagTarget, chapteruid) },
> -    { MATROSKA_ID_TAGTARGETS_ATTACHUID,  EBML_UINT, 0, offsetof(MatroskaTagTarget, attachuid) },
> +    { MATROSKA_ID_TAGTARGETS_TYPE,       EBML_STR,  0,                offsetof(MatroskaTagTarget, type) },
> +    { MATROSKA_ID_TAGTARGETS_TYPEVALUE,  EBML_UINT, 0,                offsetof(MatroskaTagTarget, typevalue), { .u = MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE } },
> +    { MATROSKA_ID_TAGTARGETS_TRACKUID,   EBML_UINT, sizeof(uint64_t), offsetof(MatroskaTagTarget, trackuids) },
> +    { MATROSKA_ID_TAGTARGETS_CHAPTERUID, EBML_UINT, sizeof(uint64_t), offsetof(MatroskaTagTarget, chapteruids) },
> +    { MATROSKA_ID_TAGTARGETS_ATTACHUID,  EBML_UINT, sizeof(uint64_t), offsetof(MatroskaTagTarget, attachuids) },
>      CHILD_OF(matroska_tag)
>  };
>  
>  static EbmlSyntax matroska_tag[] = {
> -    { MATROSKA_ID_SIMPLETAG,  EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags, tag),    { .n = matroska_simpletag } },
> -    { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0,                   offsetof(MatroskaTags, target), { .n = matroska_tagtargets } },
> +    { MATROSKA_ID_SIMPLETAG,  EBML_NEST, sizeof(MatroskaSimpleTag), offsetof(MatroskaTag, simpletags), { .n = matroska_simpletag } },
> +    { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0,                         offsetof(MatroskaTag, target),     { .n = matroska_tagtargets } },
>      CHILD_OF(matroska_tags)
>  };
>  
>  static EbmlSyntax matroska_tags[] = {
> -    { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext, tags), { .n = matroska_tag } },
> +    { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaDemuxContext, tags), { .n = matroska_tag } },
>      CHILD_OF(matroska_segment)
>  };
>  
> @@ -1719,103 +1719,169 @@ failed:
>      return result;
>  }
>  
> -static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
> -                                 AVDictionary **metadata, char *prefix)
> +static void matroska_get_targettype_string(char *key, unsigned *key_ptr, unsigned key_size,
> +                                           MatroskaTag const *tag, enum AVMediaType codec_type)
>  {
> -    MatroskaTag *tags = list->elem;
> -    char key[1024];
> -    int i;
> +    for (const TypeValueConv *conv = ff_mkv_typevalue_conv;
> +         tag->target.typevalue <= (uint64_t)conv->typevalue;
> +         ++conv)
> +        if (tag->target.typevalue == (uint64_t)conv->typevalue &&
> +            (codec_type == AVMEDIA_TYPE_UNKNOWN || (conv->codec_types & (1 << codec_type))) &&
> +            (!tag->target.type || !av_strcasecmp(tag->target.type, conv->str)))
> +        {
> +            *key_ptr = strlen(conv->str);
> +            memcpy(key, conv->str, sizeof conv->str);
> +            return;
> +        }
>  
> -    for (i = 0; i < list->nb_elem; i++) {
> -        const char *lang = tags[i].lang &&
> -                           strcmp(tags[i].lang, "und") ? tags[i].lang : NULL;
> +    *key_ptr = snprintf(key, key_size, "%"PRIu64"%s",
> +                        tag->target.typevalue, tag->target.type);
> +    if (key_size < *key_ptr)
> +        *key_ptr = key_size;
> +}
>  
> -        if (!tags[i].name) {
> -            av_log(s, AV_LOG_WARNING, "Skipping invalid tag with no TagName.\n");
> +static void matroska_convert_simpletags(AVFormatContext *s, const EbmlList *simpletag_list,
> +                                        AVDictionary **metadata,
> +                                        char *key, unsigned base_key_ptr, unsigned key_size)
> +{
> +    MatroskaSimpleTag *simpletags = simpletag_list->elem;
> +
> +    for (int i = 0; i < simpletag_list->nb_elem; i++) {
> +        MatroskaSimpleTag *simpletag = &simpletags[i];
> +        const char *lang = simpletag->lang &&
> +                           strcmp(simpletag->lang, "und") ? simpletag->lang : NULL;
> +        unsigned key_ptr;
> +
> +        if (!simpletag->name) {
> +            av_log(s, AV_LOG_WARNING,
> +                   "Skipping invalid Tag with no TagName.\n");
>              continue;
>          }
> -        if (prefix)
> -            snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
> -        else
> -            av_strlcpy(key, tags[i].name, sizeof(key));
> -        if (tags[i].def || !lang) {
> -            av_dict_set(metadata, key, tags[i].string, 0);
> -            if (tags[i].sub.nb_elem)
> -                matroska_convert_tag(s, &tags[i].sub, metadata, key);
> -        }
> -        if (lang) {
> -            av_strlcat(key, "-", sizeof(key));
> -            av_strlcat(key, lang, sizeof(key));
> -            av_dict_set(metadata, key, tags[i].string, 0);
> -            if (tags[i].sub.nb_elem)
> -                matroska_convert_tag(s, &tags[i].sub, metadata, key);
> +
> +        key_ptr = base_key_ptr +
> +            snprintf(key + base_key_ptr, key_size - base_key_ptr, "%s%s",
> +                     base_key_ptr ? "/" : "", simpletag->name);
> +        if (key_size <= key_ptr) {
> +            av_log(s, AV_LOG_WARNING,
> +                   "Skipping SimpleTag with path larger than %u bytes.\n",
> +                   key_size);
> +            return;
>          }
> +
> +        if (lang)
> +            snprintf(key + key_ptr, key_size - key_ptr, "@%s%s",
> +                     !simpletag->def ? "-" : "", lang);
> +        av_dict_set(metadata, key, simpletag->string, AV_DICT_MULTIKEY);
> +
> +        matroska_convert_simpletags(s, &simpletag->sub, metadata,
> +                                    key, key_ptr, key_size);
>      }
> +}
> +
> +static void matroska_convert_tag(AVFormatContext *s, const MatroskaTag *tag,
> +                                 AVDictionary **metadata, enum AVMediaType codec_type)
> +{
> +    char key[1024];
> +    unsigned key_ptr;
> +
> +    matroska_get_targettype_string(key, &key_ptr, sizeof(key),
> +                                   tag, codec_type);
> +    matroska_convert_simpletags(s, &tag->simpletags, metadata,
> +                                key, key_ptr, sizeof(key));
> +
>      ff_metadata_conv(metadata, NULL, ff_mkv_metadata_conv);
>  }
>  
> -static void matroska_convert_tags(AVFormatContext *s)
> +static void matroska_convert_tags_for_attachment_cb(void *data,
> +                                                    AVDictionary ***metadata,
> +                                                    enum AVMediaType *codec_type)
>  {
> -    MatroskaDemuxContext *matroska = s->priv_data;
> -    MatroskaTags *tags = matroska->tags.elem;
> -    int i, j;
> +    MatroskaAttachment *attachment = data;
> +    if (attachment->stream) {
> +        *metadata = &attachment->stream->metadata;
> +        *codec_type = attachment->stream->codecpar->codec_type;
> +    }
> +}
>  
> -    for (i = 0; i < matroska->tags.nb_elem; i++) {
> -        if (tags[i].target.attachuid) {
> -            MatroskaAttachment *attachment = matroska->attachments.elem;
> -            int found = 0;
> -            for (j = 0; j < matroska->attachments.nb_elem; j++) {
> -                if (attachment[j].uid == tags[i].target.attachuid &&
> -                    attachment[j].stream) {
> -                    matroska_convert_tag(s, &tags[i].tag,
> -                                         &attachment[j].stream->metadata, NULL);
> -                    found = 1;
> -                }
> -            }
> -            if (!found) {
> -                av_log(s, AV_LOG_WARNING,
> -                       "The tags at index %d refer to a "
> -                       "non-existent attachment %"PRId64".\n",
> -                       i, tags[i].target.attachuid);
> -            }
> -        } else if (tags[i].target.chapteruid) {
> -            MatroskaChapter *chapter = matroska->chapters.elem;
> -            int found = 0;
> -            for (j = 0; j < matroska->chapters.nb_elem; j++) {
> -                if (chapter[j].uid == tags[i].target.chapteruid &&
> -                    chapter[j].chapter) {
> -                    matroska_convert_tag(s, &tags[i].tag,
> -                                         &chapter[j].chapter->metadata, NULL);
> -                    found = 1;
> -                }
> -            }
> -            if (!found) {
> -                av_log(s, AV_LOG_WARNING,
> -                       "The tags at index %d refer to a non-existent chapter "
> -                       "%"PRId64".\n",
> -                       i, tags[i].target.chapteruid);
> -            }
> -        } else if (tags[i].target.trackuid) {
> -            MatroskaTrack *track = matroska->tracks.elem;
> -            int found = 0;
> -            for (j = 0; j < matroska->tracks.nb_elem; j++) {
> -                if (track[j].uid == tags[i].target.trackuid &&
> -                    track[j].stream) {
> -                    matroska_convert_tag(s, &tags[i].tag,
> -                                         &track[j].stream->metadata, NULL);
> -                    found = 1;
> -               }
> -            }
> -            if (!found) {
> -                av_log(s, AV_LOG_WARNING,
> -                       "The tags at index %d refer to a non-existent track "
> -                       "%"PRId64".\n",
> -                       i, tags[i].target.trackuid);
> +static void matroska_convert_tags_for_chapter_cb(void *data,
> +                                                 AVDictionary ***metadata,
> +                                                 enum AVMediaType *codec_type)
> +{
> +    MatroskaChapter *chapter = data;
> +    if (chapter->chapter)
> +        *metadata = &chapter->chapter->metadata;
> +}
> +
> +static void matroska_convert_tags_for_track_cb(void *data,
> +                                               AVDictionary ***metadata,
> +                                               enum AVMediaType *codec_type)
> +{
> +    MatroskaTrack *track = data;
> +    if (track->stream) {
> +        *metadata = &track->stream->metadata;
> +        *codec_type = track->stream->codecpar->codec_type;
> +    }
> +}
> +
> +/* First field of elements in elem_list is expected to contain the uid. */
> +static int matroska_convert_tag_for_uids(AVFormatContext *s, const MatroskaTag *tag,
> +                                         EbmlList *elem_list, unsigned list_elem_size,
> +                                         EbmlList *uid_list, const char *elem_name,
> +                                         void(*elem_cb)(void *data, AVDictionary ***, enum AVMediaType *))
> +{
> +    uint64_t *uids = uid_list->elem;
> +
> +    for (int i = 0; i < uid_list->nb_elem; ++i) {
> +        uint64_t uid = uids[i];
> +        int found = 0;
> +
> +        void *data = elem_list->elem;
> +        for (int j = 0; j < elem_list->nb_elem; ++j, data = (char *)data + list_elem_size) {
> +            uint64_t elem_uid = *(uint64_t *)data;
> +
> +            if (uid == 0 /* Matches all. */ || uid == elem_uid) {
> +                AVDictionary **metadata = NULL;
> +                enum AVMediaType codec_type = AVMEDIA_TYPE_UNKNOWN;
> +
> +                found = 1;
> +                elem_cb(data, &metadata, &codec_type);
> +
> +                if (metadata)
> +                    matroska_convert_tag(s, tag, metadata, codec_type);
>              }
> -        } else {
> -            matroska_convert_tag(s, &tags[i].tag, &s->metadata,
> -                                 tags[i].target.type);
>          }
> +
> +        if (!found)
> +            av_log(s, AV_LOG_WARNING,
> +                   "Tags element refer to a non-existent %s %"PRIu64".\n",
> +                   elem_name, uid);
> +    }
> +
> +    return uid_list->nb_elem;
> +}
> +
> +static void matroska_convert_tags(AVFormatContext *s)
> +{
> +    MatroskaDemuxContext *matroska = s->priv_data;
> +    MatroskaTag *tags = matroska->tags.elem;
> +
> +    for (int i = 0; i < matroska->tags.nb_elem; i++) {
> +        MatroskaTag *tag = &tags[i];
> +        int any = 0;
> +        any |= matroska_convert_tag_for_uids(s, tag,
> +                                             &matroska->attachments, sizeof(MatroskaAttachment),
> +                                             &tag->target.attachuids, "attachment",
> +                                             matroska_convert_tags_for_attachment_cb);
> +        any |= matroska_convert_tag_for_uids(s, tag,
> +                                             &matroska->chapters, sizeof(MatroskaChapter),
> +                                             &tag->target.chapteruids, "chapter",
> +                                             matroska_convert_tags_for_chapter_cb);
> +        any |= matroska_convert_tag_for_uids(s, tag,
> +                                             &matroska->tracks, sizeof(MatroskaTrack),
> +                                             &tag->target.trackuids, "track",
> +                                             matroska_convert_tags_for_track_cb);
> +        if (!any)
> +            matroska_convert_tag(s, tag, &s->metadata, AVMEDIA_TYPE_UNKNOWN);
>      }
>  }
>  
> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
> index 233c472..2a098c1 100644
> --- a/libavformat/matroskaenc.c
> +++ b/libavformat/matroskaenc.c
> @@ -61,6 +61,10 @@
>  #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
>                                !(mkv)->is_live)
>  
> +#define TAG_DURATION_PLACEHOLDER "00:00:00.000000000"
> +
> +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
> +
>  enum {
>      DEFAULT_MODE_INFER,
>      DEFAULT_MODE_INFER_NO_SUBS,
> @@ -1433,58 +1437,185 @@ static int mkv_write_tracks(AVFormatContext *s)
>                                               MATROSKA_ID_TRACKS);
>  }
>  
> -static int mkv_write_simpletag(AVIOContext *pb, const AVDictionaryEntry *t)
> +typedef struct {
> +    uint64_t typevalue;
> +    uint64_t uid;
> +    uint32_t uid_elementid;
> +    const char *lang;
> +    unsigned def;
> +    char *data;
> +    /* (TargetValue, Tag), [ (TagName, SimpleTag)... ] */
> +    unsigned depth;
> +    struct {
> +        char *name;
> +        ebml_master tag;
> +    } levels[EBML_MAX_DEPTH - 1 /* Tags */ - 1 /* Segment */];
> +} TagWriterContext;
> +
> +static int mkv_start_tag(MatroskaMuxContext *mkv, AVIOContext **pb,
> +                         TagWriterContext *tw)
> +{
> +    ebml_master targets;
> +    int ret;
> +    const char *type = tw->levels[0].name;
> +
> +    if (!*pb) {
> +        ret = start_ebml_master_crc32(pb, mkv);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    tw->levels[0].tag = start_ebml_master(*pb, MATROSKA_ID_TAG,        0);
> +    targets = start_ebml_master(*pb, MATROSKA_ID_TAGTARGETS, 0);
> +
> +    if (*type || tw->typevalue != MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE)
> +        put_ebml_uint(*pb, MATROSKA_ID_TAGTARGETS_TYPEVALUE, tw->typevalue);
> +    if (*type)
> +        put_ebml_string(*pb, MATROSKA_ID_TAGTARGETS_TYPE, type);
> +    if (tw->uid_elementid)
> +        put_ebml_uid(*pb, tw->uid_elementid, tw->uid);
> +
> +    end_ebml_master(*pb, targets);
> +    return 0;
> +}
> +
> +static uint64_t mkv_type2typevalue(const char *type)
>  {
> +    char type_buf[sizeof(((TypeValueConv *)0)->str)];
> +
> +    strncpy(type_buf, type, sizeof type_buf);
> +
> +    for (const TypeValueConv *conv = ff_mkv_typevalue_conv;
> +         conv->typevalue;
> +         ++conv)
> +        if (!memcmp(type_buf, conv->str, sizeof(type_buf)))
> +            return conv->typevalue;
> +
> +    return MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE;
> +}
> +
> +static int mkv_tagwriter_init(TagWriterContext *tw, const AVDictionaryEntry *t)
> +{
> +    enum { MAX_DEPTH = ARRAY_SIZE(tw->levels) };
> +
>      uint8_t *key = av_strdup(t->key);
> -    uint8_t *p   = key;
> -    const uint8_t *lang = NULL;
> -    ebml_master tag;
> +    char *p = key;
> +    unsigned depth;
>  
>      if (!key)
>          return AVERROR(ENOMEM);
>  
> -    if ((p = strrchr(p, '-')) &&
> -        (lang = ff_convert_lang_to(p + 1, AV_LANG_ISO639_2_BIBL)))
> -        *p = 0;
> +    /* TAG at -lang */
> +    if ((p = strrchr(p, '@'))) {
> +        tw->def = p[1] != '-';
> +        tw->lang = ff_convert_lang_to(p + 1 + !tw->def, AV_LANG_ISO639_2_BIBL);
> +        *p = '\0';
> +    } else {
> +        tw->def = 1;
> +        tw->lang = NULL;
> +    }
>  
> -    p = key;
> -    while (*p) {
> +    /* "hello world" -> "HELLO_WORLD" */
> +    for (p = key; *p; ++p)
>          if (*p == ' ')
>              *p = '_';
> -        else if (*p >= 'a' && *p <= 'z')
> +        else if ('a' <= *p && *p <= 'z')
>              *p -= 'a' - 'A';
> -        p++;
> -    }
>  
> -    tag = start_ebml_master(pb, MATROSKA_ID_SIMPLETAG, 0);
> -    put_ebml_string(pb, MATROSKA_ID_TAGNAME, key);
> -    if (lang)
> -        put_ebml_string(pb, MATROSKA_ID_TAGLANG, lang);
> -    put_ebml_string(pb, MATROSKA_ID_TAGSTRING, t->value);
> -    end_ebml_master(pb, tag);
> +    tw->data = key;
> +
> +    depth = 0;
> +    p = key;
> +    for (;;) {
> +        tw->levels[depth++].name = p;
> +        if (!(p = strchr(p, '/')) || MAX_DEPTH <= depth)
> +            break;
> +        *p++ = '\0';
> +    }
> +    /* Use default TagTarget for simple tag names (no '/'). */
> +    if (1 == depth) {
> +        tw->levels[1].name = tw->levels[0].name;
> +        tw->levels[0].name = (char *)"";
> +        depth++;
> +        tw->typevalue = MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE;
> +    } else {
> +        /* Try parse "123TARGETTYPE". */
> +        p = tw->levels[0].name;
> +        if ('0' <= *p && *p <= '9') {
> +            tw->typevalue = 0;
> +            do
> +                tw->typevalue = tw->typevalue * 10 + (*p++ - '0');
> +            while ('0' <= *p && *p <= '9');
> +            tw->levels[0].name = *p ? p : (char *)"";
> +        } else {
> +            tw->typevalue = mkv_type2typevalue(p);
> +        }
> +    }
> +    tw->depth = depth;
>  
> -    av_freep(&key);
>      return 0;
>  }
>  
> -static int mkv_write_tag_targets(MatroskaMuxContext *mkv, AVIOContext **pb,
> -                                 ebml_master *tag, uint32_t elementid, uint64_t uid)
> +static int mkv_write_simpletag(MatroskaMuxContext *mkv, AVIOContext **pb,
> +                               TagWriterContext *ptw, const AVDictionaryEntry *t,
> +                               uint32_t uid_elementid, uint64_t uid)
>  {
> -    ebml_master targets;
> +    TagWriterContext tw;
> +    unsigned i = 0;
>      int ret;
>  
> -    if (!*pb) {
> -        ret = start_ebml_master_crc32(pb, mkv);
> +    if (t) {
> +        ret = mkv_tagwriter_init(&tw, t);
>          if (ret < 0)
> -            return ret;
> +            goto out;
> +
> +        tw.uid = uid;
> +        tw.uid_elementid = uid_elementid;
> +    }
> +
> +    /* 1. Find common levels. */
> +    if (ptw) {
> +        if (t &&
> +            ptw->uid == tw.uid &&
> +            ptw->uid_elementid == tw.uid_elementid)
> +            for (;
> +                 i < tw.depth && i < ptw->depth &&
> +                 !strcmp(tw.levels[i].name, ptw->levels[i].name);
> +                 ++i)
> +                tw.levels[i].tag = ptw->levels[i].tag;
> +
> +        /* 2. Close unneded tags. */
> +        for (unsigned j = ptw->depth; i < j;)
> +            end_ebml_master(*pb, ptw->levels[--j].tag);
> +    }
> +
> +    /* 3. Open new tags. */
> +    if (t) {
> +        for (; i < tw.depth; ++i) {
> +            if (!i) {
> +                ret = mkv_start_tag(mkv, pb, &tw);
> +                if (ret < 0)
> +                    return ret;
> +            } else {
> +                tw.levels[i].tag = start_ebml_master(*pb, MATROSKA_ID_SIMPLETAG, 0);
> +                put_ebml_string(*pb, MATROSKA_ID_TAGNAME, tw.levels[i].name);
> +            }
> +        }
> +
> +        /* 4. Write contents to the last one (intermediate tags will have name only). */
> +        if (tw.lang)
> +            put_ebml_string(*pb, MATROSKA_ID_TAGLANG, tw.lang);
> +        if (!tw.def)
> +            put_ebml_uint(*pb, MATROSKA_ID_TAGDEFAULT, tw.def);
> +        put_ebml_string(*pb, MATROSKA_ID_TAGSTRING, t->value);
>      }
>  
> -    *tag    = start_ebml_master(*pb, MATROSKA_ID_TAG,        0);
> -    targets = start_ebml_master(*pb, MATROSKA_ID_TAGTARGETS, 4 + 1 + 8);
> -    if (elementid)
> -        put_ebml_uid(*pb, elementid, uid);
> -    end_ebml_master(*pb, targets);
> -    return 0;
> +out:
> +    if (ptw)
> +        av_free(ptw->data);
> +    /* Save new state. */
> +    memcpy(ptw, &tw, sizeof(tw));
> +    return ret;
>  }
>  
>  static int mkv_check_tag_name(const char *name, uint32_t elementid)
> @@ -1501,58 +1632,36 @@ static int mkv_check_tag_name(const char *name, uint32_t elementid)
>               av_strcasecmp(name, "mimetype")));
>  }
>  
> -static int mkv_write_tag(MatroskaMuxContext *mkv, const AVDictionary *m,
> -                         AVIOContext **pb, ebml_master *tag,
> +static int mkv_write_tag(MatroskaMuxContext *mkv, const AVDictionary *metadata,
> +                         AVIOContext **pb, TagWriterContext *tw,
>                           uint32_t elementid, uint64_t uid)
>  {
>      const AVDictionaryEntry *t = NULL;
> -    ebml_master tag2;
>      int ret;
>  
> -    ret = mkv_write_tag_targets(mkv, pb, tag ? tag : &tag2, elementid, uid);
> -    if (ret < 0)
> -        return ret;
> -
> -    while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX))) {
> +    while ((t = av_dict_get(metadata, "", t, AV_DICT_IGNORE_SUFFIX))) {
>          if (mkv_check_tag_name(t->key, elementid)) {
> -            ret = mkv_write_simpletag(*pb, t);
> +            ret = mkv_write_simpletag(mkv, pb, tw, t, elementid, uid);
>              if (ret < 0)
>                  return ret;
>          }
>      }
> -
> -    if (!tag)
> -        end_ebml_master(*pb, tag2);
> -
>      return 0;
>  }
>  
> -static int mkv_check_tag(const AVDictionary *m, uint32_t elementid)
> -{
> -    const AVDictionaryEntry *t = NULL;
> -
> -    while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX)))
> -        if (mkv_check_tag_name(t->key, elementid))
> -            return 1;
> -
> -    return 0;
> -}
> -
> -static int mkv_write_tags(AVFormatContext *s)
> +static int mkv_write_tags(AVFormatContext *s, TagWriterContext *tw)
>  {
>      MatroskaMuxContext *mkv = s->priv_data;
> -    ebml_master tag, *tagp = IS_SEEKABLE(s->pb, mkv) ? &tag : NULL;
>      int i, ret;
> +    AVIOContext **pb = &mkv->tags.bc;
>  
>      mkv->wrote_tags = 1;
>  
>      ff_metadata_conv_ctx(s, ff_mkv_metadata_conv, NULL);
>  
> -    if (mkv_check_tag(s->metadata, 0)) {
> -        ret = mkv_write_tag(mkv, s->metadata, &mkv->tags.bc, NULL, 0, 0);
> -        if (ret < 0)
> -            return ret;
> -    }
> +    ret = mkv_write_tag(mkv, s->metadata, pb, tw, 0, 0);
> +    if (ret < 0)
> +        return ret;
>  
>      for (i = 0; i < s->nb_streams; i++) {
>          const AVStream *st = s->streams[i];
> @@ -1561,28 +1670,24 @@ static int mkv_write_tags(AVFormatContext *s)
>          if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT)
>              continue;
>  
> -        if (!tagp && !mkv_check_tag(st->metadata, MATROSKA_ID_TAGTARGETS_TRACKUID))
> -            continue;
> -
> -        ret = mkv_write_tag(mkv, st->metadata, &mkv->tags.bc, tagp,
> +        ret = mkv_write_tag(mkv, st->metadata, pb, tw,
>                              MATROSKA_ID_TAGTARGETS_TRACKUID, track->uid);
>          if (ret < 0)
>              return ret;
>  
> -        if (tagp) {
> -            AVIOContext *pb = mkv->tags.bc;
> -            ebml_master simpletag;
> +        if (IS_SEEKABLE(s->pb, mkv)) {
> +            static const AVDictionaryEntry duration_tag = {
> +                .key = (char *)"DURATION",
> +                .value = (char *)TAG_DURATION_PLACEHOLDER,
> +            };
>  
> -            simpletag = start_ebml_master(pb, MATROSKA_ID_SIMPLETAG,
> -                                          2 + 1 + 8 + 23);
> -            put_ebml_string(pb, MATROSKA_ID_TAGNAME, "DURATION");
> -            track->duration_offset = avio_tell(pb);
> +            ret = mkv_write_simpletag(mkv, pb, tw, &duration_tag,
> +                                      MATROSKA_ID_TAGTARGETS_TRACKUID, track->uid);
> +            if (ret < 0)
> +                return ret;
>  
> -            // Reserve space to write duration as a 20-byte string.
> -            // 2 (ebml id) + 1 (data size) + 20 (data)
> -            put_ebml_void(pb, 23);
> -            end_ebml_master(pb, simpletag);
> -            end_ebml_master(pb, tag);
> +            /* Save position at the start of tag value. */
> +            track->duration_offset = avio_tell(*pb) - (sizeof(TAG_DURATION_PLACEHOLDER) - 1);
>          }
>      }
>  
> @@ -1594,24 +1699,24 @@ static int mkv_write_tags(AVFormatContext *s)
>              if (st->codecpar->codec_type != AVMEDIA_TYPE_ATTACHMENT)
>                  continue;
>  
> -            if (!mkv_check_tag(st->metadata, MATROSKA_ID_TAGTARGETS_ATTACHUID))
> -                continue;
> -
> -            ret = mkv_write_tag(mkv, st->metadata, &mkv->tags.bc, NULL,
> +            ret = mkv_write_tag(mkv, st->metadata, pb, tw,
>                                  MATROSKA_ID_TAGTARGETS_ATTACHUID, track->uid);
>              if (ret < 0)
>                  return ret;
>          }
>      }
>  
> -    if (mkv->tags.bc) {
> +    ret = mkv_write_simpletag(mkv, pb, tw, NULL, 0, 0);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (mkv->tags.bc)
>          return end_ebml_master_crc32_tentatively(s->pb, &mkv->tags, mkv,
>                                                   MATROSKA_ID_TAGS);
> -    }
>      return 0;
>  }
>  
> -static int mkv_write_chapters(AVFormatContext *s)
> +static int mkv_write_chapters(AVFormatContext *s, TagWriterContext *tw)
>  {
>      MatroskaMuxContext *mkv = s->priv_data;
>      AVIOContext *dyn_cp = NULL, *dyn_tags = NULL, **tags, *pb = s->pb;
> @@ -1669,23 +1774,29 @@ static int mkv_write_chapters(AVFormatContext *s)
>          }
>          end_ebml_master(dyn_cp, chapteratom);
>  
> -        if (tags && mkv_check_tag(c->metadata, MATROSKA_ID_TAGTARGETS_CHAPTERUID)) {
> -            ret = mkv_write_tag(mkv, c->metadata, tags, NULL,
> +        if (tags) {
> +            ret = mkv_write_tag(mkv, c->metadata, tags, tw,
>                                  MATROSKA_ID_TAGTARGETS_CHAPTERUID,
>                                  (uint32_t)c->id + chapter_id_offset);
>              if (ret < 0)
>                  goto fail;
>          }
>      }
> +
>      end_ebml_master(dyn_cp, editionentry);
>      mkv->wrote_chapters = 1;
>  
>      ret = end_ebml_master_crc32(pb, &dyn_cp, mkv, MATROSKA_ID_CHAPTERS, 0, 0, 1);
>      if (ret < 0)
>          goto fail;
> -    if (dyn_tags)
> +    if (dyn_tags) {
> +        ret = mkv_write_simpletag(mkv, &dyn_tags, tw, NULL, 0, 0);
> +        if (ret < 0)
> +            return ret;
> +
>          return end_ebml_master_crc32(pb, &dyn_tags, mkv,
>                                       MATROSKA_ID_TAGS, 0, 0, 1);
> +    }
>      return 0;
>  
>  fail:
> @@ -1791,6 +1902,7 @@ static int mkv_write_header(AVFormatContext *s)
>      const AVDictionaryEntry *tag;
>      int ret, i, version = 2;
>      int64_t creation_time;
> +    TagWriterContext tw = { 0 };
>  
>      if (mkv->mode != MODE_WEBM ||
>          av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
> @@ -1881,7 +1993,7 @@ static int mkv_write_header(AVFormatContext *s)
>      if (ret < 0)
>          return ret;
>  
> -    ret = mkv_write_chapters(s);
> +    ret = mkv_write_chapters(s, &tw);
>      if (ret < 0)
>          return ret;
>  
> @@ -1893,7 +2005,7 @@ static int mkv_write_header(AVFormatContext *s)
>  
>      /* Must come after mkv_write_chapters() to write chapter tags
>       * into the same Tags element as the other tags. */
> -    ret = mkv_write_tags(s);
> +    ret = mkv_write_tags(s, &tw);
>      if (ret < 0)
>          return ret;
>  
> @@ -2458,6 +2570,7 @@ static int mkv_write_trailer(AVFormatContext *s)
>      AVIOContext *pb = s->pb;
>      int64_t endpos, ret64;
>      int ret, ret2 = 0;
> +    TagWriterContext tw = { 0 };
>  
>      // check if we have an audio packet cached
>      if (mkv->cur_audio_pkt.size > 0) {
> @@ -2476,7 +2589,7 @@ static int mkv_write_trailer(AVFormatContext *s)
>              return ret;
>      }
>  
> -    ret = mkv_write_chapters(s);
> +    ret = mkv_write_chapters(s, &tw);
>      if (ret < 0)
>          return ret;
>  
> @@ -2579,20 +2692,19 @@ after_cues:
>              const AVStream     *st = s->streams[i];
>              const mkv_track *track = &mkv->tracks[i];
>  
> -            if (track->duration_offset > 0) {
> +            if (track->duration_offset) {
>                  double duration_sec = track->duration * av_q2d(st->time_base);
> -                char duration_string[20] = "";
> +                char duration_string[sizeof TAG_DURATION_PLACEHOLDER];
>  
>                  av_log(s, AV_LOG_DEBUG, "stream %d end duration = %" PRIu64 "\n", i,
>                         track->duration);
>  
> -                avio_seek(mkv->tags.bc, track->duration_offset, SEEK_SET);
> -
> -                snprintf(duration_string, 20, "%02d:%02d:%012.9f",
> -                         (int) duration_sec / 3600, ((int) duration_sec / 60) % 60,
> +                snprintf(duration_string, sizeof duration_string, "%02d:%02d:%012.9f",
> +                         (int)duration_sec / 3600, ((int)duration_sec / 60) % 60,
>                           fmod(duration_sec, 60));
>  
> -                put_ebml_binary(mkv->tags.bc, MATROSKA_ID_TAGSTRING, duration_string, 20);
> +                avio_seek(mkv->tags.bc, track->duration_offset, SEEK_SET);
> +                avio_write(mkv->tags.bc, duration_string, sizeof TAG_DURATION_PLACEHOLDER - 1);
>              }
>          }
>  
> 

Your patch requires changes to FATE, our regression-testsuite. This is
not bad in itself and to be expected from a patch like this, but the
changes need to be part of the very same patch.

- Andreas


More information about the ffmpeg-devel mailing list