[FFmpeg-devel] [PATCH] avformat/matroska: Handle TargetType and nested SimpleTags in metadata

zsugabubus at national.shitposting.agency zsugabubus at national.shitposting.agency
Tue Dec 8 21:35:16 EET 2020


Metadata names now recognized in the following format:

  [ [TargetTypeValue][TargetType] "/" ] TagName [ "/" TagName ]... [ "@" [ "-" ] [TagLanguage] ]

Language designators separated with "@" instead of "-" to avoid future
ambiguity about IETF language tags, like "en-US" and similar.
"-" designates TagDefault := 0.

Signed-off-by: zsugabubus <zsugabubus at national.shitposting.agency>
---
 libavformat/matroska.c    |  56 ++++++-
 libavformat/matroska.h    |   9 ++
 libavformat/matroskadec.c | 286 +++++++++++++++++++++--------------
 libavformat/matroskaenc.c | 308 ++++++++++++++++++++++++++------------
 4 files changed, 449 insertions(+), 210 deletions(-)

diff --git a/libavformat/matroska.c b/libavformat/matroska.c
index 7c56aba..4fcade2 100644
--- a/libavformat/matroska.c
+++ b/libavformat/matroska.c
@@ -120,11 +120,63 @@ const CodecTags ff_webm_codec_tags[] = {
 };
 
 const AVMetadataConv ff_mkv_metadata_conv[] = {
-    { "LEAD_PERFORMER", "performer" },
-    { "PART_NUMBER"   , "track"  },
+    { "TRACK/ARTIST",        "artist"       },
+    { "ALBUM/ARTIST",        "album_artist" },
+    { "ALBUM/TITLE",         "album"        },
+    { "ALBUM/DATE_RELEASED", "date"         },
+    { "TRACK/TITLE",         "title"        },
+    { "ALBUM/PART_NUMBER",   "track"        },
+    { "ALBUM/TOTAL_PARTS",   "track_total"  },
+    { "VOLUME/PART_NUMBER",  "disc"         },
+    { "VOLUME/TOTAL_PARTS",  "disc_total"   },
+    { "TRACK/GENRE",         "genre"        },
+
+    { "50/COMMENT",          "comment"      },
+    { "ALBUM/COMMENT",       "comment"      },
+    { "EPISODE/COMMENT",     "comment"      },
     { 0 }
 };
 
+#define A (1 << AVMEDIA_TYPE_AUDIO)
+#define V (1 << AVMEDIA_TYPE_VIDEO)
+#define AV A | V
+
+/* Note: When converting from typevalues -> strings first matching entry is used. */
+const TypeValueConv ff_mkv_typevalue_conv[] = {
+    { "COLLECTION", 70, AV },
+
+    { "SEASON",     60, V  },
+    { "VOLUME",     60, AV },
+    { "EDITION",    60, A  },
+    { "ISSUE",      60, A  },
+    { "OPUS",       60, A  },
+    { "SEQUEL",     60, V  },
+
+    { "ALBUM",      50, A  },
+    { "EPISODE",    50, V  },
+    { "OPERA",      50, A  },
+    { "MOVIE",      50, V  },
+    { "CONCERT",    50, AV },
+
+    { "PART",       40, AV },
+    { "SESSION",    40, AV },
+
+    { "TRACK",      30, A  },
+    { "CHAPTER",    30, V  },
+    { "SONG",       30, A  },
+
+    { "SUBTRACK",   20, A  },
+    { "MOVEMENT",   20, A  },
+    { "SCENE",      20, V  },
+
+    { "SHOT",       10, V  },
+    { "",            0, -1 }
+};
+
+#undef A
+#undef V
+#undef AV
+
 const char * const ff_matroska_video_stereo_mode[MATROSKA_VIDEO_STEREOMODE_TYPE_NB] = {
     "mono",
     "left_right",
diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 6f198f0..6dc9582 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -355,14 +355,23 @@ typedef struct CodecTags{
     enum AVCodecID id;
 }CodecTags;
 
+typedef struct {
+    char str[sizeof "COLLECTION"];
+    uint8_t typevalue;
+    uint8_t codec_types; /* (1 << AVMEDIA_TYPE_*)... */
+} TypeValueConv;
+
 /* max. depth in the EBML tree structure */
 #define EBML_MAX_DEPTH 16
 
 #define MATROSKA_VIDEO_STEREO_PLANE_COUNT  3
 
+enum { MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE = 50 };
+
 extern const CodecTags ff_mkv_codec_tags[];
 extern const CodecTags ff_webm_codec_tags[];
 extern const AVMetadataConv ff_mkv_metadata_conv[];
+extern const TypeValueConv ff_mkv_typevalue_conv[];
 extern const char * const ff_matroska_video_stereo_mode[MATROSKA_VIDEO_STEREOMODE_TYPE_NB];
 extern const char * const ff_matroska_video_stereo_plane[MATROSKA_VIDEO_STEREO_PLANE_COUNT];
 
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 5455594..1c0478d 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -98,8 +98,8 @@ typedef enum {
 typedef const struct EbmlSyntax {
     uint32_t id;
     EbmlType type;
-    size_t list_elem_size;
-    size_t data_offset;
+    unsigned list_elem_size;
+    unsigned data_offset;
     union {
         int64_t     i;
         uint64_t    u;
@@ -228,8 +228,8 @@ typedef struct MatroskaTrackOperation {
 } MatroskaTrackOperation;
 
 typedef struct MatroskaTrack {
+    uint64_t uid; /* Must be the first field. */
     uint64_t num;
-    uint64_t uid;
     uint64_t type;
     char    *name;
     char    *codec_id;
@@ -258,7 +258,7 @@ typedef struct MatroskaTrack {
 } MatroskaTrack;
 
 typedef struct MatroskaAttachment {
-    uint64_t uid;
+    uint64_t uid; /* Must be the first field. */
     char *filename;
     char *description;
     char *mime;
@@ -268,9 +268,9 @@ typedef struct MatroskaAttachment {
 } MatroskaAttachment;
 
 typedef struct MatroskaChapter {
+    uint64_t uid; /* Must be the first field. */
     uint64_t start;
     uint64_t end;
-    uint64_t uid;
     char    *title;
 
     AVChapter *chapter;
@@ -286,26 +286,26 @@ typedef struct MatroskaIndex {
     EbmlList pos;
 } MatroskaIndex;
 
-typedef struct MatroskaTag {
+typedef struct MatroskaSimpleTag {
     char *name;
     char *string;
     char *lang;
     uint64_t def;
     EbmlList sub;
-} MatroskaTag;
+} MatroskaSimpleTag;
 
 typedef struct MatroskaTagTarget {
     char    *type;
     uint64_t typevalue;
-    uint64_t trackuid;
-    uint64_t chapteruid;
-    uint64_t attachuid;
+    EbmlList trackuids;
+    EbmlList chapteruids;
+    EbmlList attachuids;
 } MatroskaTagTarget;
 
-typedef struct MatroskaTags {
+typedef struct MatroskaTag {
     MatroskaTagTarget target;
-    EbmlList tag;
-} MatroskaTags;
+    EbmlList simpletags;
+} MatroskaTag;
 
 typedef struct MatroskaSeekhead {
     uint64_t id;
@@ -652,32 +652,32 @@ static EbmlSyntax matroska_index[] = {
 };
 
 static EbmlSyntax matroska_simpletag[] = {
-    { MATROSKA_ID_TAGNAME,        EBML_UTF8, 0,                   offsetof(MatroskaTag, name) },
-    { MATROSKA_ID_TAGSTRING,      EBML_UTF8, 0,                   offsetof(MatroskaTag, string) },
-    { MATROSKA_ID_TAGLANG,        EBML_STR,  0,                   offsetof(MatroskaTag, lang), { .s = "und" } },
-    { MATROSKA_ID_TAGDEFAULT,     EBML_UINT, 0,                   offsetof(MatroskaTag, def) },
-    { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0,                   offsetof(MatroskaTag, def) },
-    { MATROSKA_ID_SIMPLETAG,      EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag, sub),  { .n = matroska_simpletag } },
+    { MATROSKA_ID_TAGNAME,        EBML_UTF8, 0,                         offsetof(MatroskaSimpleTag, name) },
+    { MATROSKA_ID_TAGSTRING,      EBML_UTF8, 0,                         offsetof(MatroskaSimpleTag, string) },
+    { MATROSKA_ID_TAGLANG,        EBML_STR,  0,                         offsetof(MatroskaSimpleTag, lang), { .s = "und" } },
+    { MATROSKA_ID_TAGDEFAULT,     EBML_UINT, 0,                         offsetof(MatroskaSimpleTag, def), { .u = 1 } },
+    { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0,                         offsetof(MatroskaSimpleTag, def), { .u = 1 } },
+    { MATROSKA_ID_SIMPLETAG,      EBML_NEST, sizeof(MatroskaSimpleTag), offsetof(MatroskaSimpleTag, sub), { .n = matroska_simpletag } },
     CHILD_OF(matroska_tag)
 };
 
 static EbmlSyntax matroska_tagtargets[] = {
-    { MATROSKA_ID_TAGTARGETS_TYPE,       EBML_STR,  0, offsetof(MatroskaTagTarget, type) },
-    { MATROSKA_ID_TAGTARGETS_TYPEVALUE,  EBML_UINT, 0, offsetof(MatroskaTagTarget, typevalue), { .u = 50 } },
-    { MATROSKA_ID_TAGTARGETS_TRACKUID,   EBML_UINT, 0, offsetof(MatroskaTagTarget, trackuid) },
-    { MATROSKA_ID_TAGTARGETS_CHAPTERUID, EBML_UINT, 0, offsetof(MatroskaTagTarget, chapteruid) },
-    { MATROSKA_ID_TAGTARGETS_ATTACHUID,  EBML_UINT, 0, offsetof(MatroskaTagTarget, attachuid) },
+    { MATROSKA_ID_TAGTARGETS_TYPE,       EBML_STR,  0,                offsetof(MatroskaTagTarget, type) },
+    { MATROSKA_ID_TAGTARGETS_TYPEVALUE,  EBML_UINT, 0,                offsetof(MatroskaTagTarget, typevalue), { .u = MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE } },
+    { MATROSKA_ID_TAGTARGETS_TRACKUID,   EBML_UINT, sizeof(uint64_t), offsetof(MatroskaTagTarget, trackuids) },
+    { MATROSKA_ID_TAGTARGETS_CHAPTERUID, EBML_UINT, sizeof(uint64_t), offsetof(MatroskaTagTarget, chapteruids) },
+    { MATROSKA_ID_TAGTARGETS_ATTACHUID,  EBML_UINT, sizeof(uint64_t), offsetof(MatroskaTagTarget, attachuids) },
     CHILD_OF(matroska_tag)
 };
 
 static EbmlSyntax matroska_tag[] = {
-    { MATROSKA_ID_SIMPLETAG,  EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags, tag),    { .n = matroska_simpletag } },
-    { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0,                   offsetof(MatroskaTags, target), { .n = matroska_tagtargets } },
+    { MATROSKA_ID_SIMPLETAG,  EBML_NEST, sizeof(MatroskaSimpleTag), offsetof(MatroskaTag, simpletags), { .n = matroska_simpletag } },
+    { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0,                         offsetof(MatroskaTag, target),     { .n = matroska_tagtargets } },
     CHILD_OF(matroska_tags)
 };
 
 static EbmlSyntax matroska_tags[] = {
-    { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext, tags), { .n = matroska_tag } },
+    { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaDemuxContext, tags), { .n = matroska_tag } },
     CHILD_OF(matroska_segment)
 };
 
@@ -1719,103 +1719,169 @@ failed:
     return result;
 }
 
-static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
-                                 AVDictionary **metadata, char *prefix)
+static void matroska_get_targettype_string(char *key, unsigned *key_ptr, unsigned key_size,
+                                           MatroskaTag const *tag, enum AVMediaType codec_type)
 {
-    MatroskaTag *tags = list->elem;
-    char key[1024];
-    int i;
+    for (const TypeValueConv *conv = ff_mkv_typevalue_conv;
+         tag->target.typevalue <= (uint64_t)conv->typevalue;
+         ++conv)
+        if (tag->target.typevalue == (uint64_t)conv->typevalue &&
+            (codec_type == AVMEDIA_TYPE_UNKNOWN || (conv->codec_types & (1 << codec_type))) &&
+            (!tag->target.type || !av_strcasecmp(tag->target.type, conv->str)))
+        {
+            *key_ptr = strlen(conv->str);
+            memcpy(key, conv->str, sizeof conv->str);
+            return;
+        }
 
-    for (i = 0; i < list->nb_elem; i++) {
-        const char *lang = tags[i].lang &&
-                           strcmp(tags[i].lang, "und") ? tags[i].lang : NULL;
+    *key_ptr = snprintf(key, key_size, "%"PRIu64"%s",
+                        tag->target.typevalue, tag->target.type);
+    if (key_size < *key_ptr)
+        *key_ptr = key_size;
+}
 
-        if (!tags[i].name) {
-            av_log(s, AV_LOG_WARNING, "Skipping invalid tag with no TagName.\n");
+static void matroska_convert_simpletags(AVFormatContext *s, const EbmlList *simpletag_list,
+                                        AVDictionary **metadata,
+                                        char *key, unsigned base_key_ptr, unsigned key_size)
+{
+    MatroskaSimpleTag *simpletags = simpletag_list->elem;
+
+    for (int i = 0; i < simpletag_list->nb_elem; i++) {
+        MatroskaSimpleTag *simpletag = &simpletags[i];
+        const char *lang = simpletag->lang &&
+                           strcmp(simpletag->lang, "und") ? simpletag->lang : NULL;
+        unsigned key_ptr;
+
+        if (!simpletag->name) {
+            av_log(s, AV_LOG_WARNING,
+                   "Skipping invalid Tag with no TagName.\n");
             continue;
         }
-        if (prefix)
-            snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
-        else
-            av_strlcpy(key, tags[i].name, sizeof(key));
-        if (tags[i].def || !lang) {
-            av_dict_set(metadata, key, tags[i].string, 0);
-            if (tags[i].sub.nb_elem)
-                matroska_convert_tag(s, &tags[i].sub, metadata, key);
-        }
-        if (lang) {
-            av_strlcat(key, "-", sizeof(key));
-            av_strlcat(key, lang, sizeof(key));
-            av_dict_set(metadata, key, tags[i].string, 0);
-            if (tags[i].sub.nb_elem)
-                matroska_convert_tag(s, &tags[i].sub, metadata, key);
+
+        key_ptr = base_key_ptr +
+            snprintf(key + base_key_ptr, key_size - base_key_ptr, "%s%s",
+                     base_key_ptr ? "/" : "", simpletag->name);
+        if (key_size <= key_ptr) {
+            av_log(s, AV_LOG_WARNING,
+                   "Skipping SimpleTag with path larger than %u bytes.\n",
+                   key_size);
+            return;
         }
+
+        if (lang)
+            snprintf(key + key_ptr, key_size - key_ptr, "@%s%s",
+                     !simpletag->def ? "-" : "", lang);
+        av_dict_set(metadata, key, simpletag->string, AV_DICT_MULTIKEY);
+
+        matroska_convert_simpletags(s, &simpletag->sub, metadata,
+                                    key, key_ptr, key_size);
     }
+}
+
+static void matroska_convert_tag(AVFormatContext *s, const MatroskaTag *tag,
+                                 AVDictionary **metadata, enum AVMediaType codec_type)
+{
+    char key[1024];
+    unsigned key_ptr;
+
+    matroska_get_targettype_string(key, &key_ptr, sizeof(key),
+                                   tag, codec_type);
+    matroska_convert_simpletags(s, &tag->simpletags, metadata,
+                                key, key_ptr, sizeof(key));
+
     ff_metadata_conv(metadata, NULL, ff_mkv_metadata_conv);
 }
 
-static void matroska_convert_tags(AVFormatContext *s)
+static void matroska_convert_tags_for_attachment_cb(void *data,
+                                                    AVDictionary ***metadata,
+                                                    enum AVMediaType *codec_type)
 {
-    MatroskaDemuxContext *matroska = s->priv_data;
-    MatroskaTags *tags = matroska->tags.elem;
-    int i, j;
+    MatroskaAttachment *attachment = data;
+    if (attachment->stream) {
+        *metadata = &attachment->stream->metadata;
+        *codec_type = attachment->stream->codecpar->codec_type;
+    }
+}
 
-    for (i = 0; i < matroska->tags.nb_elem; i++) {
-        if (tags[i].target.attachuid) {
-            MatroskaAttachment *attachment = matroska->attachments.elem;
-            int found = 0;
-            for (j = 0; j < matroska->attachments.nb_elem; j++) {
-                if (attachment[j].uid == tags[i].target.attachuid &&
-                    attachment[j].stream) {
-                    matroska_convert_tag(s, &tags[i].tag,
-                                         &attachment[j].stream->metadata, NULL);
-                    found = 1;
-                }
-            }
-            if (!found) {
-                av_log(s, AV_LOG_WARNING,
-                       "The tags at index %d refer to a "
-                       "non-existent attachment %"PRId64".\n",
-                       i, tags[i].target.attachuid);
-            }
-        } else if (tags[i].target.chapteruid) {
-            MatroskaChapter *chapter = matroska->chapters.elem;
-            int found = 0;
-            for (j = 0; j < matroska->chapters.nb_elem; j++) {
-                if (chapter[j].uid == tags[i].target.chapteruid &&
-                    chapter[j].chapter) {
-                    matroska_convert_tag(s, &tags[i].tag,
-                                         &chapter[j].chapter->metadata, NULL);
-                    found = 1;
-                }
-            }
-            if (!found) {
-                av_log(s, AV_LOG_WARNING,
-                       "The tags at index %d refer to a non-existent chapter "
-                       "%"PRId64".\n",
-                       i, tags[i].target.chapteruid);
-            }
-        } else if (tags[i].target.trackuid) {
-            MatroskaTrack *track = matroska->tracks.elem;
-            int found = 0;
-            for (j = 0; j < matroska->tracks.nb_elem; j++) {
-                if (track[j].uid == tags[i].target.trackuid &&
-                    track[j].stream) {
-                    matroska_convert_tag(s, &tags[i].tag,
-                                         &track[j].stream->metadata, NULL);
-                    found = 1;
-               }
-            }
-            if (!found) {
-                av_log(s, AV_LOG_WARNING,
-                       "The tags at index %d refer to a non-existent track "
-                       "%"PRId64".\n",
-                       i, tags[i].target.trackuid);
+static void matroska_convert_tags_for_chapter_cb(void *data,
+                                                 AVDictionary ***metadata,
+                                                 enum AVMediaType *codec_type)
+{
+    MatroskaChapter *chapter = data;
+    if (chapter->chapter)
+        *metadata = &chapter->chapter->metadata;
+}
+
+static void matroska_convert_tags_for_track_cb(void *data,
+                                               AVDictionary ***metadata,
+                                               enum AVMediaType *codec_type)
+{
+    MatroskaTrack *track = data;
+    if (track->stream) {
+        *metadata = &track->stream->metadata;
+        *codec_type = track->stream->codecpar->codec_type;
+    }
+}
+
+/* First field of elements in elem_list is expected to contain the uid. */
+static int matroska_convert_tag_for_uids(AVFormatContext *s, const MatroskaTag *tag,
+                                         EbmlList *elem_list, unsigned list_elem_size,
+                                         EbmlList *uid_list, const char *elem_name,
+                                         void(*elem_cb)(void *data, AVDictionary ***, enum AVMediaType *))
+{
+    uint64_t *uids = uid_list->elem;
+
+    for (int i = 0; i < uid_list->nb_elem; ++i) {
+        uint64_t uid = uids[i];
+        int found = 0;
+
+        void *data = elem_list->elem;
+        for (int j = 0; j < elem_list->nb_elem; ++j, data = (char *)data + list_elem_size) {
+            uint64_t elem_uid = *(uint64_t *)data;
+
+            if (uid == 0 /* Matches all. */ || uid == elem_uid) {
+                AVDictionary **metadata = NULL;
+                enum AVMediaType codec_type = AVMEDIA_TYPE_UNKNOWN;
+
+                found = 1;
+                elem_cb(data, &metadata, &codec_type);
+
+                if (metadata)
+                    matroska_convert_tag(s, tag, metadata, codec_type);
             }
-        } else {
-            matroska_convert_tag(s, &tags[i].tag, &s->metadata,
-                                 tags[i].target.type);
         }
+
+        if (!found)
+            av_log(s, AV_LOG_WARNING,
+                   "Tags element refer to a non-existent %s %"PRIu64".\n",
+                   elem_name, uid);
+    }
+
+    return uid_list->nb_elem;
+}
+
+static void matroska_convert_tags(AVFormatContext *s)
+{
+    MatroskaDemuxContext *matroska = s->priv_data;
+    MatroskaTag *tags = matroska->tags.elem;
+
+    for (int i = 0; i < matroska->tags.nb_elem; i++) {
+        MatroskaTag *tag = &tags[i];
+        int any = 0;
+        any |= matroska_convert_tag_for_uids(s, tag,
+                                             &matroska->attachments, sizeof(MatroskaAttachment),
+                                             &tag->target.attachuids, "attachment",
+                                             matroska_convert_tags_for_attachment_cb);
+        any |= matroska_convert_tag_for_uids(s, tag,
+                                             &matroska->chapters, sizeof(MatroskaChapter),
+                                             &tag->target.chapteruids, "chapter",
+                                             matroska_convert_tags_for_chapter_cb);
+        any |= matroska_convert_tag_for_uids(s, tag,
+                                             &matroska->tracks, sizeof(MatroskaTrack),
+                                             &tag->target.trackuids, "track",
+                                             matroska_convert_tags_for_track_cb);
+        if (!any)
+            matroska_convert_tag(s, tag, &s->metadata, AVMEDIA_TYPE_UNKNOWN);
     }
 }
 
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 233c472..2a098c1 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -61,6 +61,10 @@
 #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
                               !(mkv)->is_live)
 
+#define TAG_DURATION_PLACEHOLDER "00:00:00.000000000"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
 enum {
     DEFAULT_MODE_INFER,
     DEFAULT_MODE_INFER_NO_SUBS,
@@ -1433,58 +1437,185 @@ static int mkv_write_tracks(AVFormatContext *s)
                                              MATROSKA_ID_TRACKS);
 }
 
-static int mkv_write_simpletag(AVIOContext *pb, const AVDictionaryEntry *t)
+typedef struct {
+    uint64_t typevalue;
+    uint64_t uid;
+    uint32_t uid_elementid;
+    const char *lang;
+    unsigned def;
+    char *data;
+    /* (TargetValue, Tag), [ (TagName, SimpleTag)... ] */
+    unsigned depth;
+    struct {
+        char *name;
+        ebml_master tag;
+    } levels[EBML_MAX_DEPTH - 1 /* Tags */ - 1 /* Segment */];
+} TagWriterContext;
+
+static int mkv_start_tag(MatroskaMuxContext *mkv, AVIOContext **pb,
+                         TagWriterContext *tw)
+{
+    ebml_master targets;
+    int ret;
+    const char *type = tw->levels[0].name;
+
+    if (!*pb) {
+        ret = start_ebml_master_crc32(pb, mkv);
+        if (ret < 0)
+            return ret;
+    }
+
+    tw->levels[0].tag = start_ebml_master(*pb, MATROSKA_ID_TAG,        0);
+    targets = start_ebml_master(*pb, MATROSKA_ID_TAGTARGETS, 0);
+
+    if (*type || tw->typevalue != MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE)
+        put_ebml_uint(*pb, MATROSKA_ID_TAGTARGETS_TYPEVALUE, tw->typevalue);
+    if (*type)
+        put_ebml_string(*pb, MATROSKA_ID_TAGTARGETS_TYPE, type);
+    if (tw->uid_elementid)
+        put_ebml_uid(*pb, tw->uid_elementid, tw->uid);
+
+    end_ebml_master(*pb, targets);
+    return 0;
+}
+
+static uint64_t mkv_type2typevalue(const char *type)
 {
+    char type_buf[sizeof(((TypeValueConv *)0)->str)];
+
+    strncpy(type_buf, type, sizeof type_buf);
+
+    for (const TypeValueConv *conv = ff_mkv_typevalue_conv;
+         conv->typevalue;
+         ++conv)
+        if (!memcmp(type_buf, conv->str, sizeof(type_buf)))
+            return conv->typevalue;
+
+    return MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE;
+}
+
+static int mkv_tagwriter_init(TagWriterContext *tw, const AVDictionaryEntry *t)
+{
+    enum { MAX_DEPTH = ARRAY_SIZE(tw->levels) };
+
     uint8_t *key = av_strdup(t->key);
-    uint8_t *p   = key;
-    const uint8_t *lang = NULL;
-    ebml_master tag;
+    char *p = key;
+    unsigned depth;
 
     if (!key)
         return AVERROR(ENOMEM);
 
-    if ((p = strrchr(p, '-')) &&
-        (lang = ff_convert_lang_to(p + 1, AV_LANG_ISO639_2_BIBL)))
-        *p = 0;
+    /* TAG at -lang */
+    if ((p = strrchr(p, '@'))) {
+        tw->def = p[1] != '-';
+        tw->lang = ff_convert_lang_to(p + 1 + !tw->def, AV_LANG_ISO639_2_BIBL);
+        *p = '\0';
+    } else {
+        tw->def = 1;
+        tw->lang = NULL;
+    }
 
-    p = key;
-    while (*p) {
+    /* "hello world" -> "HELLO_WORLD" */
+    for (p = key; *p; ++p)
         if (*p == ' ')
             *p = '_';
-        else if (*p >= 'a' && *p <= 'z')
+        else if ('a' <= *p && *p <= 'z')
             *p -= 'a' - 'A';
-        p++;
-    }
 
-    tag = start_ebml_master(pb, MATROSKA_ID_SIMPLETAG, 0);
-    put_ebml_string(pb, MATROSKA_ID_TAGNAME, key);
-    if (lang)
-        put_ebml_string(pb, MATROSKA_ID_TAGLANG, lang);
-    put_ebml_string(pb, MATROSKA_ID_TAGSTRING, t->value);
-    end_ebml_master(pb, tag);
+    tw->data = key;
+
+    depth = 0;
+    p = key;
+    for (;;) {
+        tw->levels[depth++].name = p;
+        if (!(p = strchr(p, '/')) || MAX_DEPTH <= depth)
+            break;
+        *p++ = '\0';
+    }
+    /* Use default TagTarget for simple tag names (no '/'). */
+    if (1 == depth) {
+        tw->levels[1].name = tw->levels[0].name;
+        tw->levels[0].name = (char *)"";
+        depth++;
+        tw->typevalue = MATROSKA_DEFAULT_TAGTARGETS_TYPEVALUE;
+    } else {
+        /* Try parse "123TARGETTYPE". */
+        p = tw->levels[0].name;
+        if ('0' <= *p && *p <= '9') {
+            tw->typevalue = 0;
+            do
+                tw->typevalue = tw->typevalue * 10 + (*p++ - '0');
+            while ('0' <= *p && *p <= '9');
+            tw->levels[0].name = *p ? p : (char *)"";
+        } else {
+            tw->typevalue = mkv_type2typevalue(p);
+        }
+    }
+    tw->depth = depth;
 
-    av_freep(&key);
     return 0;
 }
 
-static int mkv_write_tag_targets(MatroskaMuxContext *mkv, AVIOContext **pb,
-                                 ebml_master *tag, uint32_t elementid, uint64_t uid)
+static int mkv_write_simpletag(MatroskaMuxContext *mkv, AVIOContext **pb,
+                               TagWriterContext *ptw, const AVDictionaryEntry *t,
+                               uint32_t uid_elementid, uint64_t uid)
 {
-    ebml_master targets;
+    TagWriterContext tw;
+    unsigned i = 0;
     int ret;
 
-    if (!*pb) {
-        ret = start_ebml_master_crc32(pb, mkv);
+    if (t) {
+        ret = mkv_tagwriter_init(&tw, t);
         if (ret < 0)
-            return ret;
+            goto out;
+
+        tw.uid = uid;
+        tw.uid_elementid = uid_elementid;
+    }
+
+    /* 1. Find common levels. */
+    if (ptw) {
+        if (t &&
+            ptw->uid == tw.uid &&
+            ptw->uid_elementid == tw.uid_elementid)
+            for (;
+                 i < tw.depth && i < ptw->depth &&
+                 !strcmp(tw.levels[i].name, ptw->levels[i].name);
+                 ++i)
+                tw.levels[i].tag = ptw->levels[i].tag;
+
+        /* 2. Close unneded tags. */
+        for (unsigned j = ptw->depth; i < j;)
+            end_ebml_master(*pb, ptw->levels[--j].tag);
+    }
+
+    /* 3. Open new tags. */
+    if (t) {
+        for (; i < tw.depth; ++i) {
+            if (!i) {
+                ret = mkv_start_tag(mkv, pb, &tw);
+                if (ret < 0)
+                    return ret;
+            } else {
+                tw.levels[i].tag = start_ebml_master(*pb, MATROSKA_ID_SIMPLETAG, 0);
+                put_ebml_string(*pb, MATROSKA_ID_TAGNAME, tw.levels[i].name);
+            }
+        }
+
+        /* 4. Write contents to the last one (intermediate tags will have name only). */
+        if (tw.lang)
+            put_ebml_string(*pb, MATROSKA_ID_TAGLANG, tw.lang);
+        if (!tw.def)
+            put_ebml_uint(*pb, MATROSKA_ID_TAGDEFAULT, tw.def);
+        put_ebml_string(*pb, MATROSKA_ID_TAGSTRING, t->value);
     }
 
-    *tag    = start_ebml_master(*pb, MATROSKA_ID_TAG,        0);
-    targets = start_ebml_master(*pb, MATROSKA_ID_TAGTARGETS, 4 + 1 + 8);
-    if (elementid)
-        put_ebml_uid(*pb, elementid, uid);
-    end_ebml_master(*pb, targets);
-    return 0;
+out:
+    if (ptw)
+        av_free(ptw->data);
+    /* Save new state. */
+    memcpy(ptw, &tw, sizeof(tw));
+    return ret;
 }
 
 static int mkv_check_tag_name(const char *name, uint32_t elementid)
@@ -1501,58 +1632,36 @@ static int mkv_check_tag_name(const char *name, uint32_t elementid)
              av_strcasecmp(name, "mimetype")));
 }
 
-static int mkv_write_tag(MatroskaMuxContext *mkv, const AVDictionary *m,
-                         AVIOContext **pb, ebml_master *tag,
+static int mkv_write_tag(MatroskaMuxContext *mkv, const AVDictionary *metadata,
+                         AVIOContext **pb, TagWriterContext *tw,
                          uint32_t elementid, uint64_t uid)
 {
     const AVDictionaryEntry *t = NULL;
-    ebml_master tag2;
     int ret;
 
-    ret = mkv_write_tag_targets(mkv, pb, tag ? tag : &tag2, elementid, uid);
-    if (ret < 0)
-        return ret;
-
-    while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX))) {
+    while ((t = av_dict_get(metadata, "", t, AV_DICT_IGNORE_SUFFIX))) {
         if (mkv_check_tag_name(t->key, elementid)) {
-            ret = mkv_write_simpletag(*pb, t);
+            ret = mkv_write_simpletag(mkv, pb, tw, t, elementid, uid);
             if (ret < 0)
                 return ret;
         }
     }
-
-    if (!tag)
-        end_ebml_master(*pb, tag2);
-
     return 0;
 }
 
-static int mkv_check_tag(const AVDictionary *m, uint32_t elementid)
-{
-    const AVDictionaryEntry *t = NULL;
-
-    while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX)))
-        if (mkv_check_tag_name(t->key, elementid))
-            return 1;
-
-    return 0;
-}
-
-static int mkv_write_tags(AVFormatContext *s)
+static int mkv_write_tags(AVFormatContext *s, TagWriterContext *tw)
 {
     MatroskaMuxContext *mkv = s->priv_data;
-    ebml_master tag, *tagp = IS_SEEKABLE(s->pb, mkv) ? &tag : NULL;
     int i, ret;
+    AVIOContext **pb = &mkv->tags.bc;
 
     mkv->wrote_tags = 1;
 
     ff_metadata_conv_ctx(s, ff_mkv_metadata_conv, NULL);
 
-    if (mkv_check_tag(s->metadata, 0)) {
-        ret = mkv_write_tag(mkv, s->metadata, &mkv->tags.bc, NULL, 0, 0);
-        if (ret < 0)
-            return ret;
-    }
+    ret = mkv_write_tag(mkv, s->metadata, pb, tw, 0, 0);
+    if (ret < 0)
+        return ret;
 
     for (i = 0; i < s->nb_streams; i++) {
         const AVStream *st = s->streams[i];
@@ -1561,28 +1670,24 @@ static int mkv_write_tags(AVFormatContext *s)
         if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT)
             continue;
 
-        if (!tagp && !mkv_check_tag(st->metadata, MATROSKA_ID_TAGTARGETS_TRACKUID))
-            continue;
-
-        ret = mkv_write_tag(mkv, st->metadata, &mkv->tags.bc, tagp,
+        ret = mkv_write_tag(mkv, st->metadata, pb, tw,
                             MATROSKA_ID_TAGTARGETS_TRACKUID, track->uid);
         if (ret < 0)
             return ret;
 
-        if (tagp) {
-            AVIOContext *pb = mkv->tags.bc;
-            ebml_master simpletag;
+        if (IS_SEEKABLE(s->pb, mkv)) {
+            static const AVDictionaryEntry duration_tag = {
+                .key = (char *)"DURATION",
+                .value = (char *)TAG_DURATION_PLACEHOLDER,
+            };
 
-            simpletag = start_ebml_master(pb, MATROSKA_ID_SIMPLETAG,
-                                          2 + 1 + 8 + 23);
-            put_ebml_string(pb, MATROSKA_ID_TAGNAME, "DURATION");
-            track->duration_offset = avio_tell(pb);
+            ret = mkv_write_simpletag(mkv, pb, tw, &duration_tag,
+                                      MATROSKA_ID_TAGTARGETS_TRACKUID, track->uid);
+            if (ret < 0)
+                return ret;
 
-            // Reserve space to write duration as a 20-byte string.
-            // 2 (ebml id) + 1 (data size) + 20 (data)
-            put_ebml_void(pb, 23);
-            end_ebml_master(pb, simpletag);
-            end_ebml_master(pb, tag);
+            /* Save position at the start of tag value. */
+            track->duration_offset = avio_tell(*pb) - (sizeof(TAG_DURATION_PLACEHOLDER) - 1);
         }
     }
 
@@ -1594,24 +1699,24 @@ static int mkv_write_tags(AVFormatContext *s)
             if (st->codecpar->codec_type != AVMEDIA_TYPE_ATTACHMENT)
                 continue;
 
-            if (!mkv_check_tag(st->metadata, MATROSKA_ID_TAGTARGETS_ATTACHUID))
-                continue;
-
-            ret = mkv_write_tag(mkv, st->metadata, &mkv->tags.bc, NULL,
+            ret = mkv_write_tag(mkv, st->metadata, pb, tw,
                                 MATROSKA_ID_TAGTARGETS_ATTACHUID, track->uid);
             if (ret < 0)
                 return ret;
         }
     }
 
-    if (mkv->tags.bc) {
+    ret = mkv_write_simpletag(mkv, pb, tw, NULL, 0, 0);
+    if (ret < 0)
+        return ret;
+
+    if (mkv->tags.bc)
         return end_ebml_master_crc32_tentatively(s->pb, &mkv->tags, mkv,
                                                  MATROSKA_ID_TAGS);
-    }
     return 0;
 }
 
-static int mkv_write_chapters(AVFormatContext *s)
+static int mkv_write_chapters(AVFormatContext *s, TagWriterContext *tw)
 {
     MatroskaMuxContext *mkv = s->priv_data;
     AVIOContext *dyn_cp = NULL, *dyn_tags = NULL, **tags, *pb = s->pb;
@@ -1669,23 +1774,29 @@ static int mkv_write_chapters(AVFormatContext *s)
         }
         end_ebml_master(dyn_cp, chapteratom);
 
-        if (tags && mkv_check_tag(c->metadata, MATROSKA_ID_TAGTARGETS_CHAPTERUID)) {
-            ret = mkv_write_tag(mkv, c->metadata, tags, NULL,
+        if (tags) {
+            ret = mkv_write_tag(mkv, c->metadata, tags, tw,
                                 MATROSKA_ID_TAGTARGETS_CHAPTERUID,
                                 (uint32_t)c->id + chapter_id_offset);
             if (ret < 0)
                 goto fail;
         }
     }
+
     end_ebml_master(dyn_cp, editionentry);
     mkv->wrote_chapters = 1;
 
     ret = end_ebml_master_crc32(pb, &dyn_cp, mkv, MATROSKA_ID_CHAPTERS, 0, 0, 1);
     if (ret < 0)
         goto fail;
-    if (dyn_tags)
+    if (dyn_tags) {
+        ret = mkv_write_simpletag(mkv, &dyn_tags, tw, NULL, 0, 0);
+        if (ret < 0)
+            return ret;
+
         return end_ebml_master_crc32(pb, &dyn_tags, mkv,
                                      MATROSKA_ID_TAGS, 0, 0, 1);
+    }
     return 0;
 
 fail:
@@ -1791,6 +1902,7 @@ static int mkv_write_header(AVFormatContext *s)
     const AVDictionaryEntry *tag;
     int ret, i, version = 2;
     int64_t creation_time;
+    TagWriterContext tw = { 0 };
 
     if (mkv->mode != MODE_WEBM ||
         av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
@@ -1881,7 +1993,7 @@ static int mkv_write_header(AVFormatContext *s)
     if (ret < 0)
         return ret;
 
-    ret = mkv_write_chapters(s);
+    ret = mkv_write_chapters(s, &tw);
     if (ret < 0)
         return ret;
 
@@ -1893,7 +2005,7 @@ static int mkv_write_header(AVFormatContext *s)
 
     /* Must come after mkv_write_chapters() to write chapter tags
      * into the same Tags element as the other tags. */
-    ret = mkv_write_tags(s);
+    ret = mkv_write_tags(s, &tw);
     if (ret < 0)
         return ret;
 
@@ -2458,6 +2570,7 @@ static int mkv_write_trailer(AVFormatContext *s)
     AVIOContext *pb = s->pb;
     int64_t endpos, ret64;
     int ret, ret2 = 0;
+    TagWriterContext tw = { 0 };
 
     // check if we have an audio packet cached
     if (mkv->cur_audio_pkt.size > 0) {
@@ -2476,7 +2589,7 @@ static int mkv_write_trailer(AVFormatContext *s)
             return ret;
     }
 
-    ret = mkv_write_chapters(s);
+    ret = mkv_write_chapters(s, &tw);
     if (ret < 0)
         return ret;
 
@@ -2579,20 +2692,19 @@ after_cues:
             const AVStream     *st = s->streams[i];
             const mkv_track *track = &mkv->tracks[i];
 
-            if (track->duration_offset > 0) {
+            if (track->duration_offset) {
                 double duration_sec = track->duration * av_q2d(st->time_base);
-                char duration_string[20] = "";
+                char duration_string[sizeof TAG_DURATION_PLACEHOLDER];
 
                 av_log(s, AV_LOG_DEBUG, "stream %d end duration = %" PRIu64 "\n", i,
                        track->duration);
 
-                avio_seek(mkv->tags.bc, track->duration_offset, SEEK_SET);
-
-                snprintf(duration_string, 20, "%02d:%02d:%012.9f",
-                         (int) duration_sec / 3600, ((int) duration_sec / 60) % 60,
+                snprintf(duration_string, sizeof duration_string, "%02d:%02d:%012.9f",
+                         (int)duration_sec / 3600, ((int)duration_sec / 60) % 60,
                          fmod(duration_sec, 60));
 
-                put_ebml_binary(mkv->tags.bc, MATROSKA_ID_TAGSTRING, duration_string, 20);
+                avio_seek(mkv->tags.bc, track->duration_offset, SEEK_SET);
+                avio_write(mkv->tags.bc, duration_string, sizeof TAG_DURATION_PLACEHOLDER - 1);
             }
         }
 
-- 
2.29.2



More information about the ffmpeg-devel mailing list