[FFmpeg-devel] [PATCH 2/2] lavf: Add WebM DASH Manifest Muxer

Thu Jul 10 21:58:03 CEST 2014

On Mon, Jul 07, 2014 at 01:41:34PM -0700, Vignesh Venkatasubramanian wrote:
> This patch adds the ability to generate WebM DASH manifest XML using
> ffmpeg. A sample command line would be as follows:
> 
> ffmpeg \
>   -f webm_dash_manifest -i video1.webm \
>   -f webm_dash_manifest -i video2.webm \
>   -f webm_dash_manifest -i audio1.webm \
>   -f webm_dash_manifest -i audio2.webm \
>   -map 0 -map 1 -map 2 -map 3 \
>   -c copy \
>   -f webm_dash_manifest \
>   -adaptation_sets “id=0,streams=0,1 id=1,streams=2,3” \
>   manifest.xml
> 
> It works by exporting necessary fields as metadata tags in matroskadec
> and use those values to write the appropriate XML fields as per the WebM
> DASH Specification [1]. Some ideas are adopted from webm-tools project
> [2].
> 
> [1]
> https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
> [2]
> https://chromium.googlesource.com/webm/webm-tools/+/master/webm_dash_manifest/
> 
> Signed-off-by: Vignesh Venkatasubramanian <vigneshv at google.com>

what if the input is not webm but lets say mpeg-ts ?

also a fate test is needed. Maintaining code without any way to test
it is probably more work for you. Though its your time so i dont
really mind.

[...]

> +static double get_duration(AVFormatContext *s)
> +{
> +    int i = 0;
> +    double max = 0.0;
> +    for (i = 0; i < s->nb_streams; i++) {
> +        AVDictionaryEntry *duration = av_dict_get(s->streams[i]->metadata,
> +                                                  DURATION, NULL, 0);
> +        if (atof(duration->value) > max) max = atof(duration->value);

this lacks a test that the metadata exists, it lacks tests on it being
valid

also floating point may cause problems with regression tests

> +    }
> +    return max / 1000;
> +}
> +
> +static void write_header(AVFormatContext *s)
> +{
> +    double min_buffer_time = 1.0;
> +    avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
> +    avio_printf(s->pb, "<MPD\n");
> +    avio_printf(s->pb, "  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
> +    avio_printf(s->pb, "  xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
> +    avio_printf(s->pb, "  xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
> +    avio_printf(s->pb, "  type=\"static\"\n");
> +    avio_printf(s->pb, "  mediaPresentationDuration=\"PT%gS\"\n", get_duration(s));
> +    avio_printf(s->pb, "  minBufferTime=\"PT%gS\"\n", min_buffer_time);
> +    avio_printf(s->pb, "  profiles=\"urn:webm:dash:profile:webm-on-demand:2012\"");
> +    avio_printf(s->pb, ">\n");

i dont care much about formating but this is a mess
please seperate the format strings from the arguments somehow
can be \n or vertical alignment or anything else you like but as is
its pretty hard to read, this also applies to other parts of the patch

> +}
> +
> +static void write_footer(AVFormatContext *s)
> +{
> +    avio_printf(s->pb, "</MPD>");
> +}
> +

> +static int subsegment_alignment(AVFormatContext *s, AdaptationSet *as) {
> +    int i;
> +    AVDictionaryEntry *gold = av_dict_get(s->streams[as->streams[0]]->metadata,
> +                                          CUE_TIMESTAMPS, NULL, 0);
> +    for (i = 1; i < as->nb_streams; i++) {
> +        AVDictionaryEntry *ts = av_dict_get(s->streams[as->streams[i]]->metadata,
> +                                            CUE_TIMESTAMPS, NULL, 0);
> +        if (strncmp(gold->value, ts->value, strlen(gold->value))) return 0;

lacking checks, similar to the duration case

> +    }
> +    return 1;
> +}
> +
> +static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) {
> +    int i;
> +    AVDictionaryEntry *gold_track_num = av_dict_get(s->streams[as->streams[0]]->metadata,
> +                                                    TRACK_NUMBER, NULL, 0);
> +    AVCodecContext *gold_codec = s->streams[as->streams[0]]->codec;
> +    for (i = 1; i < as->nb_streams; i++) {
> +        AVDictionaryEntry *track_num = av_dict_get(s->streams[as->streams[i]]->metadata,
> +                                                   TRACK_NUMBER, NULL, 0);
> +        AVCodecContext *codec = s->streams[as->streams[i]]->codec;
> +        if (strncmp(gold_track_num->value, track_num->value, strlen(gold_track_num->value)) ||
> +            gold_codec->codec_id != codec->codec_id ||
> +            gold_codec->extradata_size != codec->extradata_size ||
> +            memcmp(gold_codec->extradata, codec->extradata, codec->extradata_size)) {
> +            return 0;
> +        }
> +    }
> +    return 1;
> +}
> +
> +static void write_adaptation_set(AVFormatContext *s, int as_index)
> +{
> +    WebMDashMuxContext *w = s->priv_data;
> +    AdaptationSet *as = &w->as[as_index];
> +    AVCodecContext *codec = s->streams[as->streams[0]]->codec;
> +    int i;

> +    char boolean[2][6] = { "false", "true" };

static const char

> +    int subsegmentStartsWithSAP = 1;
> +    AVDictionaryEntry *lang;
> +    avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id);
> +    avio_printf(s->pb, " mimeType=\"%s/webm\"",
> +                codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
> +    avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(codec->codec_id));
> +
> +    lang = av_dict_get(s->streams[as->streams[0]]->metadata, "language", NULL, 0);
> +    if (lang != NULL) avio_printf(s->pb, " lang=\"%s\"", lang->value);
> +
> +    if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
> +        avio_printf(s->pb, " width=\"%d\"", codec->width);
> +        avio_printf(s->pb, " height=\"%d\"", codec->height);
> +    } else {
> +        avio_printf(s->pb, " audioSamplingRate=\"%d\"", codec->sample_rate);
> +    }
> +
> +    avio_printf(s->pb, " bitstreamSwitching=\"%s\"", boolean[bitstream_switching(s, as)]);
> +    avio_printf(s->pb, " subsegmentAlignment=\"%s\"", boolean[subsegment_alignment(s, as)]);
> +
> +    for (i = 0; i < as->nb_streams; i++) {
> +        AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata,
> +                                            CLUSTER_KEYFRAME, NULL, 0);
> +        if (!strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0;
> +    }
> +    avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP);
> +    avio_printf(s->pb, ">\n");
> +
> +    for (i = 0; i < as->nb_streams; i++) {
> +        AVStream *stream = s->streams[as->streams[i]];
> +        AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0);
> +        AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0);
> +        AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0);
> +        AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0);
> +        AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0);
> +        avio_printf(s->pb, "<Representation id=\"%d\"", i);
> +        avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value);
> +        avio_printf(s->pb, ">\n");
> +        avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
> +        avio_printf(s->pb, "<SegmentBase\n");
> +        avio_printf(s->pb, "  indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
> +        avio_printf(s->pb, "<Initialization\n");
> +        avio_printf(s->pb, "  range=\"0-%s\" />\n", irange->value);
> +        avio_printf(s->pb, "</SegmentBase>\n");
> +        avio_printf(s->pb, "</Representation>\n");
> +    }
> +    avio_printf(s->pb, "</AdaptationSet>\n");
> +}
> +
> +static int to_integer(char *p, int len)
> +{
> +    int ret;
> +    char *q = (char*)av_malloc(sizeof(char) * len);

useless cast

> +    strncpy(q, p, len);

missing malloc failure check

> +    ret = atoi(q);
> +    av_free(q);
> +    return ret;
> +}
> +
> +static int parse_adaptation_sets(AVFormatContext *s)
> +{
> +    WebMDashMuxContext *w = s->priv_data;
> +    char *p = w->adaptation_sets;
> +    char *q;
> +    enum { new_set, parsed_id, parsing_streams } state;
> +    // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
> +    state = new_set;
> +    while (p < w->adaptation_sets + strlen(w->adaptation_sets)) {
> +        if (*p == ' ')
> +            continue;
> +        else if (state == new_set && !strncmp(p, "id=", 3)) {
> +            w->as = av_realloc(w->as, sizeof(*w->as) * ++w->nb_as);
> +            w->as[w->nb_as - 1].nb_streams = 0;

missing realloc failure check

> +            w->as[w->nb_as - 1].streams = NULL;
> +            p += 3; // consume "id="
> +            q = w->as[w->nb_as - 1].id;
> +            while (*p != ',') *q++ = *p++;
> +            *q = 0;
> +            p++;
> +            state = parsed_id;
> +        } else if (state == parsed_id && !strncmp(p, "streams=", 8)) {
> +            p += 8; // consume "streams="
> +            state = parsing_streams;
> +        } else if (state == parsing_streams) {
> +            struct AdaptationSet *as = &w->as[w->nb_as - 1];
> +            q = p;
> +            while (*q != '\0' && *q != ',' && *q != ' ') q++;
> +            as->streams = av_realloc(as->streams, sizeof(*as->streams) * ++as->nb_streams);
> +            as->streams[as->nb_streams - 1] = to_integer(p, q - p);
> +            if (*q == '\0') break;
> +            if (*q == ' ') state = new_set;
> +            p = ++q;
> +        } else {
> +            return -1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int webm_dash_manifest_write_header(AVFormatContext *s)
> +{
> +    int i;
> +    double start = 0.0;
> +    WebMDashMuxContext *w = s->priv_data;
> +    parse_adaptation_sets(s);
> +    write_header(s);
> +    avio_printf(s->pb, "<Period id=\"0\"");
> +    avio_printf(s->pb, " start=\"PT%gS\"", start);
> +    avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
> +    avio_printf(s->pb, " >\n");
> +
> +    for (i = 0; i < w->nb_as; i++) {
> +        write_adaptation_set(s, i);
> +    }
> +
> +    avio_printf(s->pb, "</Period>\n");
> +    write_footer(s);
> +    return 0;
> +}
> +

> +static int webm_dash_manifest_write_packet(AVFormatContext *s, AVPacket *pkt)
> +{
> +    return 0;

this is surely not correct
either if no packets are expected it should fail or if there are
packets they should be considered or cross checked against the metadata

> +}
> +

> +static int webm_dash_manifest_write_trailer(AVFormatContext *s)
> +{
> +    WebMDashMuxContext *w = s->priv_data;
> +    int i;
> +    for (i = 0; i < w->nb_as; i++) {
> +        av_free(w->as[i].streams);

av_freep()

> +    }
> +    av_free(w->as);

av_freep()

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Good people do not need laws to tell them to act responsibly, while bad
people will find a way around the laws. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140710/ddd02dbb/attachment.asc>