[FFmpeg-devel] [PATCH 1/2] lavf/matroska: Add functions for WebM DASH Manifest

Vignesh Venkatasubramanian vigneshv at google.com
Mon Jul 7 18:27:22 CEST 2014


On Mon, Jun 30, 2014 at 5:18 PM, wm4 <nfxjfg at googlemail.com> wrote:
> On Mon, 30 Jun 2014 15:44:36 -0700
> Vignesh Venkatasubramanian <vigneshv at google.com> wrote:
>
>> Add functions and logic to matroskadec for use by the WebM DASH Manifest
>> XML Muxer. The actual muxer is added in a future patch.
>>
>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv at google.com>
>> ---
>>  libavformat/matroska.h    |  12 ++
>>  libavformat/matroskadec.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 374 insertions(+)
>>
>> diff --git a/libavformat/matroska.h b/libavformat/matroska.h
>> index 3bb5aee..e01b9de 100644
>> --- a/libavformat/matroska.h
>> +++ b/libavformat/matroska.h
>> @@ -284,4 +284,16 @@ extern const AVMetadataConv ff_mkv_metadata_conv[];
>>  extern const char * const ff_matroska_video_stereo_mode[MATROSKA_VIDEO_STEREO_MODE_COUNT];
>>  extern const char * const ff_matroska_video_stereo_plane[MATROSKA_VIDEO_STEREO_PLANE_COUNT];
>>
>> +/* AVStream Metadata tag keys for WebM Dash Manifest */
>> +#define INITIALIZATION_RANGE "webm_dash_manifest_initialization_range"
>> +#define CUES_START "webm_dash_manifest_cues_start"
>> +#define CUES_END "webm_dash_manifest_cues_end"
>> +#define FILENAME "webm_dash_manifest_file_name"
>> +#define BANDWIDTH "webm_dash_manifest_bandwidth"
>> +#define DURATION "webm_dash_manifest_duration"
>> +#define CLUSTER_KEYFRAME "webm_dash_manifest_cluster_keyframe"
>> +#define CUE_TIMESTAMPS "webm_dash_manifest_cue_timestamps"
>> +#define TRACK_NUMBER "webm_dash_manifest_track_number"
>> +#define CODEC_PRIVATE_SIZE "webm_dash_manifest_codec_priv_size"
>> +
>>  #endif /* AVFORMAT_MATROSKA_H */
>> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
>> index 213767c..4e45e26 100644
>> --- a/libavformat/matroskadec.c
>> +++ b/libavformat/matroskadec.c
>> @@ -3068,6 +3068,359 @@ static int matroska_read_close(AVFormatContext *s)
>>      return 0;
>>  }
>>
>> +typedef struct {
>> +    int64_t start_time_ns;
>> +    int64_t end_time_ns;
>> +    int64_t start_offset;
>> +    int64_t end_offset;
>> +} CueDesc;
>> +
>> +/* This function searches all the Cues and returns the CueDesc corresponding the
>> + * the timestamp ts. Returned CueDesc will be such that start_time_ns <= ts <
>> + * end_time_ns. All 4 fields will be set to -1 if ts >= file's duration.
>> + */
>> +static CueDesc get_cue_desc(AVFormatContext *s, int64_t ts, int64_t cues_start) {
>> +    MatroskaDemuxContext *matroska = s->priv_data;
>> +    CueDesc cue_desc;
>> +    int i;
>> +    int nb_index_entries = s->streams[0]->nb_index_entries;
>> +    AVIndexEntry *index_entries = s->streams[0]->index_entries;
>> +    if (ts >= matroska->duration * matroska->time_scale) return (CueDesc) {-1, -1, -1, -1};
>> +    for (i = 1; i < nb_index_entries; i++) {
>> +        if (index_entries[i - 1].timestamp * matroska->time_scale <= ts &&
>> +            index_entries[i].timestamp * matroska->time_scale > ts) {
>> +            break;
>> +        }
>> +    }
>> +    --i;
>> +    cue_desc.start_time_ns = index_entries[i].timestamp * matroska->time_scale;
>> +    cue_desc.start_offset = index_entries[i].pos - matroska->segment_start;
>> +    if (i != nb_index_entries - 1) {
>> +        cue_desc.end_time_ns = index_entries[i + 1].timestamp * matroska->time_scale;
>> +        cue_desc.end_offset = index_entries[i + 1].pos - matroska->segment_start;
>> +    } else {
>> +        cue_desc.end_time_ns = matroska->duration * matroska->time_scale;
>> +        // FIXME: this needs special handling for files where Cues appear
>> +        // before Clusters. the current logic assumes Cues appear after
>> +        // Clusters.
>> +        cue_desc.end_offset = cues_start - matroska->segment_start;
>> +    }
>> +    return cue_desc;
>> +}
>> +
>> +static int webm_clusters_start_with_keyframe(AVFormatContext *s)
>> +{
>> +    MatroskaDemuxContext *matroska = s->priv_data;
>> +    int64_t cluster_pos, before_pos;
>> +    int index, rv = 1;
>> +    if (s->streams[0]->nb_index_entries <= 0) return 0;
>> +    // seek to the first cluster using cues.
>> +    index = av_index_search_timestamp(s->streams[0], 0, 0);
>> +    if (index < 0)  return 0;
>> +    cluster_pos = s->streams[0]->index_entries[index].pos;
>> +    before_pos = avio_tell(s->pb);
>> +    while (1) {
>> +        int64_t cluster_id = 0, cluster_length = 0;
>> +        AVPacket *pkt;
>> +        avio_seek(s->pb, cluster_pos, SEEK_SET);
>> +        // read cluster id and length
>> +        ebml_read_num(matroska, matroska->ctx->pb, 4, &cluster_id);
>> +        ebml_read_length(matroska, matroska->ctx->pb, &cluster_length);
>> +        if (cluster_id != 0xF43B675) { // done with all clusters
>> +            break;
>> +        }
>> +        avio_seek(s->pb, cluster_pos, SEEK_SET);
>> +        matroska->current_id = 0;
>> +        matroska_clear_queue(matroska);
>> +        if (matroska_parse_cluster(matroska) < 0 ||
>> +            matroska->num_packets <= 0) {
>> +            break;
>> +        }
>> +        pkt = matroska->packets[0];
>> +        cluster_pos += cluster_length + 12; // 12 is the offset of the cluster id and length.
>> +        if (!(pkt->flags & AV_PKT_FLAG_KEY)) {
>> +            rv = 0;
>> +            break;
>> +        }
>> +    }
>> +    avio_seek(s->pb, before_pos, SEEK_SET);
>> +    return rv;
>> +}
>> +
>> +static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec, int64_t bps,
>> +                                             double min_buffer, double* buffer,
>> +                                             double* sec_to_download, AVFormatContext *s,
>> +                                             int64_t cues_start)
>> +{
>> +    double nano_seconds_per_second = 1000000000.0;
>> +    double time_sec = time_ns / nano_seconds_per_second;
>> +    int rv = 0;
>> +    int64_t time_to_search_ns = (int64_t)(search_sec * nano_seconds_per_second);
>> +    int64_t end_time_ns = time_ns + time_to_search_ns;
>> +    double sec_downloaded = 0.0;
>> +    CueDesc desc_curr = get_cue_desc(s, time_ns, cues_start);
>> +    if (desc_curr.start_time_ns == -1)
>> +      return -1;
>> +    *sec_to_download = 0.0;
>> +
>> +    // Check for non cue start time.
>> +    if (time_ns > desc_curr.start_time_ns) {
>> +      int64_t cue_nano = desc_curr.end_time_ns - time_ns;
>> +      double percent = (double)(cue_nano) / (desc_curr.end_time_ns - desc_curr.start_time_ns);
>> +      double cueBytes = (desc_curr.end_offset - desc_curr.start_offset) * percent;
>> +      double timeToDownload = (cueBytes * 8.0) / bps;
>> +
>> +      sec_downloaded += (cue_nano / nano_seconds_per_second) - timeToDownload;
>> +      *sec_to_download += timeToDownload;
>> +
>> +      // Check if the search ends within the first cue.
>> +      if (desc_curr.end_time_ns >= end_time_ns) {
>> +          double desc_end_time_sec = desc_curr.end_time_ns / nano_seconds_per_second;
>> +          double percent_to_sub = search_sec / (desc_end_time_sec - time_sec);
>> +          sec_downloaded = percent_to_sub * sec_downloaded;
>> +          *sec_to_download = percent_to_sub * *sec_to_download;
>> +      }
>> +
>> +      if ((sec_downloaded + *buffer) <= min_buffer) {
>> +          return 1;
>> +      }
>> +
>> +      // Get the next Cue.
>> +      desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
>> +    }
>> +
>> +    while (desc_curr.start_time_ns != -1) {
>> +        int64_t desc_bytes = desc_curr.end_offset - desc_curr.start_offset;
>> +        int64_t desc_ns = desc_curr.end_time_ns - desc_curr.start_time_ns;
>> +        double desc_sec = desc_ns / nano_seconds_per_second;
>> +        double bits = (desc_bytes * 8.0);
>> +        double time_to_download = bits / bps;
>> +
>> +        sec_downloaded += desc_sec - time_to_download;
>> +        *sec_to_download += time_to_download;
>> +
>> +        if (desc_curr.end_time_ns >= end_time_ns) {
>> +            double desc_end_time_sec = desc_curr.end_time_ns / nano_seconds_per_second;
>> +            double percent_to_sub = search_sec / (desc_end_time_sec - time_sec);
>> +            sec_downloaded = percent_to_sub * sec_downloaded;
>> +            *sec_to_download = percent_to_sub * *sec_to_download;
>> +
>> +            if ((sec_downloaded + *buffer) <= min_buffer)
>> +                rv = 1;
>> +            break;
>> +        }
>> +
>> +        if ((sec_downloaded + *buffer) <= min_buffer) {
>> +            rv = 1;
>> +            break;
>> +        }
>> +
>> +        desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
>> +    }
>> +    *buffer = *buffer + sec_downloaded;
>> +    return rv;
>> +}
>> +
>> +/* This function computes the bandwidth of the WebM file with the help of
>> + * buffer_size_after_time_downloaded() function. Both of these functions are
>> + * adapted from WebM Tools project and are adapted to work with FFmpeg's
>> + * Matroska parsing mechanism.
>> + * See:
>> + * https://chromium.googlesource.com/webm/webm-tools/+/master/shared/webm_file.cc
>> + * for details about how this implementation works.
>
> Please no. Nobody wants to read some _other_ source code while trying
> to find out how a specific piece of source code works.
>

fair enough. i'll update the comment with more documentation.

>> + * Returns -1 on error.
>> + * */
>> +static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t cues_start)
>> +{
>> +    MatroskaDemuxContext *matroska = s->priv_data;
>> +    AVStream *st = s->streams[0];
>> +    double bandwidth = 0.0;
>> +    for (int i = 0; i < st->nb_index_entries; i++) {
>> +        int64_t prebuffer_ns = 1000000000;
>> +        int64_t time_ns = st->index_entries[i].timestamp * matroska->time_scale;
>> +        double nano_seconds_per_second = 1000000000.0;
>> +        int64_t prebuffered_ns = time_ns + prebuffer_ns;
>> +        double prebuffer_bytes = 0.0;
>> +        int64_t temp_prebuffer_ns = prebuffer_ns;
>> +        int64_t pre_bytes, pre_ns;
>> +        double pre_sec, prebuffer, bits_per_second;
>> +        CueDesc desc_beg = get_cue_desc(s, time_ns, cues_start);
>> +
>> +        // Start with the first Cue.
>> +        CueDesc desc_end = desc_beg;
>> +
>> +        // Figure out how much data we have downloaded for the prebuffer. This will
>> +        // be used later to adjust the bits per sample to try.
>> +        while (desc_end.start_time_ns != -1 && desc_end.end_time_ns < prebuffered_ns) {
>> +            // Prebuffered the entire Cue.
>> +            prebuffer_bytes += desc_end.end_offset - desc_end.start_offset;
>> +            temp_prebuffer_ns -= desc_end.end_time_ns - desc_end.start_time_ns;
>> +            desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
>> +        }
>> +        if (desc_end.start_time_ns == -1) {
>> +            // The prebuffer is larger than the duration.
>> +            return (matroska->duration * matroska->time_scale >= prebuffered_ns) ? -1 : 0;
>> +        }
>> +
>> +        // The prebuffer ends in the last Cue. Estimate how much data was
>> +        // prebuffered.
>> +        pre_bytes = desc_end.end_offset - desc_end.start_offset;
>> +        pre_ns = desc_end.end_time_ns - desc_end.start_time_ns;
>> +        pre_sec = pre_ns / nano_seconds_per_second;
>> +        prebuffer_bytes +=
>> +            pre_bytes * ((temp_prebuffer_ns / nano_seconds_per_second) / pre_sec);
>> +
>> +        prebuffer = prebuffer_ns / nano_seconds_per_second;
>> +
>> +        // Set this to 0.0 in case our prebuffer buffers the entire video.
>> +        bits_per_second = 0.0;
>> +        do {
>> +            int64_t desc_bytes = desc_end.end_offset - desc_beg.start_offset;
>> +            int64_t desc_ns = desc_end.end_time_ns - desc_beg.start_time_ns;
>> +            double desc_sec = desc_ns / nano_seconds_per_second;
>> +            double calc_bits_per_second = (desc_bytes * 8) / desc_sec;
>> +
>> +            // Drop the bps by the percentage of bytes buffered.
>> +            double percent = (desc_bytes - prebuffer_bytes) / desc_bytes;
>> +            double mod_bits_per_second = calc_bits_per_second * percent;
>> +
>> +            if (prebuffer < desc_sec) {
>> +                double search_sec =
>> +                    (double)(matroska->duration * matroska->time_scale) / nano_seconds_per_second;
>> +
>> +                // Add 1 so the bits per second should be a little bit greater than file
>> +                // datarate.
>> +                int64_t bps = (int64_t)(mod_bits_per_second) + 1;
>> +                const double min_buffer = 0.0;
>> +                double buffer = prebuffer;
>> +                double sec_to_download = 0.0;
>> +
>> +                int rv = buffer_size_after_time_downloaded(prebuffered_ns, search_sec, bps,
>> +                                                           min_buffer, &buffer, &sec_to_download,
>> +                                                           s, cues_start);
>> +                if (rv < 0) {
>> +                    return -1;
>> +                } else if (rv == 0) {
>> +                    bits_per_second = (double)(bps);
>> +                    break;
>> +                }
>> +            }
>> +
>> +            desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
>> +        } while (desc_end.start_time_ns != -1);
>> +        if (bandwidth < bits_per_second) bandwidth = bits_per_second;
>> +    }
>> +    return (int64_t)bandwidth;
>> +}
>> +
>> +static int webm_dash_manifest_cues(AVFormatContext *s)
>> +{
>> +    MatroskaDemuxContext *matroska = s->priv_data;
>> +    EbmlList *seekhead_list = &matroska->seekhead;
>> +    MatroskaSeekhead *seekhead = seekhead_list->elem;
>> +    char *buf;
>> +    int64_t cues_start, cues_end, before_pos, bandwidth;
>> +    int i;
>> +
>> +    // determine cues start and end positions
>> +    for (i = 0; i < seekhead_list->nb_elem; i++)
>> +        if (seekhead[i].id == MATROSKA_ID_CUES)
>> +            break;
>> +
>> +    if (i >= seekhead_list->nb_elem) return -1;
>> +
>> +    before_pos = avio_tell(matroska->ctx->pb);
>> +    cues_start = seekhead[i].pos + matroska->segment_start;
>> +    if (avio_seek(matroska->ctx->pb, cues_start, SEEK_SET) == cues_start) {
>> +        uint64_t cues_length = 0, cues_id = 0;
>> +        ebml_read_num(matroska, matroska->ctx->pb, 4, &cues_id);
>> +        ebml_read_length(matroska, matroska->ctx->pb, &cues_length);
>> +        cues_end = cues_start + cues_length + 11; // 11 is the offset of Cues ID.
>> +    }
>> +    avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
>> +
>> +    // parse the cues
>> +    matroska_parse_cues(matroska);
>> +
>> +    // cues start
>> +    buf = av_asprintf("%" PRId64, cues_start);
>> +    av_dict_set(&s->streams[0]->metadata, CUES_START, buf, 0);
>> +    av_free(buf);
>
> Unchecked mallocs... everywhere.
>
> All functions that allocate can fail and return NULL, and you should
> check for that, instead of crashing.
>

yes, will update.

>> +
>> +    // cues end
>> +    buf = av_asprintf("%" PRId64, cues_end);
>> +    av_dict_set(&s->streams[0]->metadata, CUES_END, buf, 0);
>> +    av_free(buf);
>> +
>> +    // bandwidth
>> +    bandwidth = webm_dash_manifest_compute_bandwidth(s, cues_start);
>> +    if (bandwidth < 0) return -1;
>> +    buf = av_asprintf("%" PRId64, bandwidth);
>> +    av_dict_set(&s->streams[0]->metadata, BANDWIDTH, buf, 0);
>> +    av_free(buf);
>> +
>> +    // check if all clusters start with key frames
>> +    buf = av_asprintf("%d", webm_clusters_start_with_keyframe(s));
>> +    av_dict_set(&s->streams[0]->metadata, CLUSTER_KEYFRAME, buf, 0);
>> +    av_free(buf);
>> +
>> +    // store cue point timestamps as a comma separated list for checking subsegment alignment in
>> +    // the muxer. assumes that each timestamp cannot be more than 20 characters long.
>> +    buf = av_malloc(s->streams[0]->nb_index_entries * 20 * sizeof(char));
>> +    strcpy(buf, "");
>> +    for (i = 0; i < s->streams[0]->nb_index_entries; i++) {
>> +        snprintf(buf, (i + 1) * 20 * sizeof(char),
>> +                 "%s%" PRId64, buf, s->streams[0]->index_entries[i].timestamp);
>> +        if (i != s->streams[0]->nb_index_entries - 1)
>> +            strncat(buf, ",", sizeof(char));
>
> sizeof(char) is always 1.
>
>> +    }
>> +    av_dict_set(&s->streams[0]->metadata, CUE_TIMESTAMPS, buf, 0);
>> +    av_free(buf);
>> +
>> +    return 0;
>> +}
>> +
>> +static int webm_dash_manifest_read_header(AVFormatContext *s)
>> +{
>> +    char *buf;
>> +    int ret = matroska_read_header(s);
>> +    MatroskaTrack *tracks;
>> +    MatroskaDemuxContext *matroska = s->priv_data;
>> +    if (ret) {
>> +        av_log(s, AV_LOG_ERROR, "Failed to read file headers\n");
>> +        return -1;
>> +    }
>> +
>> +    // initialization range
>> +    buf = av_asprintf("%" PRId64, avio_tell(s->pb) - 5); // 5 is the offset of Cluster ID.
>> +    av_dict_set(&s->streams[0]->metadata, INITIALIZATION_RANGE, buf, 0);
>> +    av_free(buf);
>> +
>> +    // basename of the file
>> +    buf = strrchr(s->filename, '/');
>> +    if (buf == NULL) return -1; // TODO return correct value here
>> +    av_dict_set(&s->streams[0]->metadata, FILENAME, ++buf, 0);
>> +
>> +    // duration
>> +    buf = av_asprintf("%g", matroska->duration);
>> +    av_dict_set(&s->streams[0]->metadata, DURATION, buf, 0);
>> +    av_free(buf);
>> +
>> +    // track number
>> +    tracks = matroska->tracks.elem;
>> +    buf = av_asprintf("%" PRId64, tracks[0].num);
>> +    av_dict_set(&s->streams[0]->metadata, TRACK_NUMBER, buf, 0);
>> +    av_free(buf);
>> +
>> +    // parse the cues and populate Cue related fields
>> +    return webm_dash_manifest_cues(s);
>> +}
>> +
>> +static int webm_dash_manifest_read_packet(AVFormatContext *s, AVPacket *pkt)
>> +{
>> +    return AVERROR_EOF;
>> +}
>
> Shouldn't this return packets? Maybe I'm misunderstanding the purpose
> of this code.
>

the point of this demuxer is to read certain metadata and pass it
along to the muxer so that it can generate the manifest XML. for this
purpose, it does not have to read any packets from the file at all,
just the headers will do. so we don't return any packets.

>> +
>>  AVInputFormat ff_matroska_demuxer = {
>>      .name           = "matroska,webm",
>>      .long_name      = NULL_IF_CONFIG_SMALL("Matroska / WebM"),
>> @@ -3078,3 +3431,12 @@ AVInputFormat ff_matroska_demuxer = {
>>      .read_close     = matroska_read_close,
>>      .read_seek      = matroska_read_seek,
>>  };
>> +
>> +AVInputFormat ff_webm_dash_manifest_demuxer = {
>> +    .name           = "webm_dash_manifest",
>> +    .long_name      = NULL_IF_CONFIG_SMALL("WebM DASH Manifest"),
>> +    .priv_data_size = sizeof(MatroskaDemuxContext),
>> +    .read_header    = webm_dash_manifest_read_header,
>> +    .read_packet    = webm_dash_manifest_read_packet,
>> +    .read_close     = matroska_read_close,
>> +};
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel



-- 
Vignesh


More information about the ffmpeg-devel mailing list