[FFmpeg-devel] [PATCH 1/2] avformat/hls demuxer: Add WebVTT subtitle support
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Fri Feb 21 11:18:02 EET 2025
softworkz:
> From: softworkz <softworkz at hotmail.com>
>
> This add support for WebVTT subtitles in HLS streams.
> Just like for separate audio streams, it supports all available
> WebVTT streams in all renditions.
> No new options are added, it just works and provides subtitles streams
> like any other demuxer.
> The code prevents downloading subtitle segments which are farther
> in the future than the main segments, to avoid loading hundreds
> of subtitle segments in advance.
>
> Signed-off-by: softworkz <softworkz at hotmail.com>
> ---
> libavformat/hls.c | 218 ++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 180 insertions(+), 38 deletions(-)
>
> diff --git a/libavformat/hls.c b/libavformat/hls.c
> index 3bdc1bc848..51cf013a66 100644
> --- a/libavformat/hls.c
> +++ b/libavformat/hls.c
> @@ -56,6 +56,8 @@
> #define MPEG_TIME_BASE 90000
> #define MPEG_TIME_BASE_Q (AVRational){1, MPEG_TIME_BASE}
>
> +static char *vtt_sample = "WEBVTT\n";
Missing const. And actually it is simpler to avoid this pointer
altogether and just use "WEBVTT\n" below.
> +
> /*
> * An apple http stream consists of a playlist with media segment files,
> * played sequentially. There may be several playlists with the same
> @@ -173,6 +175,7 @@ struct playlist {
> * playlist, if any. */
> int n_init_sections;
> struct segment **init_sections;
> + int is_subtitle; /* Indicates if it's a subtitle playlist */
> };
>
> /*
> @@ -330,6 +333,7 @@ static struct playlist *new_playlist(HLSContext *c, const char *url,
> return NULL;
> }
> pls->seek_timestamp = AV_NOPTS_VALUE;
> + pls->is_subtitle = 0;
Is pls not zero-allocated?
>
> pls->is_id3_timestamped = -1;
> pls->id3_mpegts_timestamp = AV_NOPTS_VALUE;
> @@ -515,13 +519,6 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
> return NULL;
> }
>
> - /* TODO: handle subtitles (each segment has to parsed separately) */
> - if (c->ctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL)
> - if (type == AVMEDIA_TYPE_SUBTITLE) {
> - av_log(c->ctx, AV_LOG_WARNING, "Can't support the subtitle(uri: %s)\n", info->uri);
> - return NULL;
> - }
> -
> rend = av_mallocz(sizeof(struct rendition));
> if (!rend)
> return NULL;
> @@ -536,9 +533,14 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
> /* add the playlist if this is an external rendition */
> if (info->uri[0]) {
> rend->playlist = new_playlist(c, info->uri, url_base);
> - if (rend->playlist)
> + if (rend->playlist) {
> dynarray_add(&rend->playlist->renditions,
> - &rend->playlist->n_renditions, rend);
> + &rend->playlist->n_renditions, rend);
Don't change indentation in this patch.
> + if (type == AVMEDIA_TYPE_SUBTITLE) {
> + rend->playlist->is_subtitle = 1;
> + rend->playlist->is_id3_timestamped = 0;
> + }
Nit: Put this block before dynarray_add().
> + }
> }
>
> if (info->assoc_language[0]) {
> @@ -761,6 +763,9 @@ static int test_segment(AVFormatContext *s, const AVInputFormat *in_fmt, struct
> } else if (!strcmp(in_fmt->name, "mpegts")) {
> matchF = av_match_ext( seg->url, "ts,m2t,m2ts,mts,mpg,m4s,mpeg,mpegts")
> + 2*(ff_match_url_ext(seg->url, "ts,m2t,m2ts,mts,mpg,m4s,mpeg,mpegts") > 0);
> + } else if (!strcmp(in_fmt->name, "webvtt")) {
> + matchF = av_match_ext( seg->url, "vtt,webvtt")
> + + 2*(ff_match_url_ext(seg->url, "vtt,webvtt") > 0);
> }
>
> if (!(matchA & matchF)) {
> @@ -969,7 +974,7 @@ static int parse_playlist(HLSContext *c, const char *url,
> if (ptr)
> seg_offset = strtoll(ptr+1, NULL, 10);
> } else if (av_strstart(line, "#", NULL)) {
> - av_log(c->ctx, AV_LOG_INFO, "Skip ('%s')\n", line);
> + av_log(c->ctx, AV_LOG_VERBOSE, "Skip ('%s')\n", line);
> continue;
> } else if (line[0]) {
> if (is_variant) {
> @@ -1484,7 +1489,7 @@ static int playlist_needed(struct playlist *pls)
> int first_st;
>
> /* If there is no context or streams yet, the playlist is needed */
> - if (!pls->ctx || !pls->n_main_streams)
> + if ((!pls->ctx || !pls->n_main_streams) && !pls->is_subtitle)
> return 1;
>
> /* check if any of the streams in the playlist are needed */
> @@ -1522,17 +1527,13 @@ static int playlist_needed(struct playlist *pls)
> return 0;
> }
>
> -static int read_data(void *opaque, uint8_t *buf, int buf_size)
> +static int reload_playlist(struct playlist *v, HLSContext *c)
> {
> - struct playlist *v = opaque;
> - HLSContext *c = v->parent->priv_data;
> - int ret;
> - int just_opened = 0;
> + int ret = 0;
> int reload_count = 0;
> - int segment_retries = 0;
> - struct segment *seg;
>
> -restart:
> + v->needed = playlist_needed(v);
> +
> if (!v->needed)
> return AVERROR_EOF;
>
> @@ -1588,7 +1589,7 @@ reload:
> av_log(v->parent, AV_LOG_WARNING, "The m3u8 list sequence may have been wrapped.\n");
> }
> if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
> - if (v->finished)
> + if (v->finished || v->is_subtitle)
> return AVERROR_EOF;
> while (av_gettime_relative() - v->last_load_time < reload_interval) {
> if (ff_check_interrupt(c->interrupt_callback))
> @@ -1599,9 +1600,35 @@ reload:
> goto reload;
> }
>
> - v->input_read_done = 0;
> - seg = current_segment(v);
> + }
> + return ret;
> +}
> +
> +static int read_data_continuous(void *opaque, uint8_t *buf, int buf_size)
> +{
> + struct playlist *v = opaque;
> + HLSContext *c = v->parent->priv_data;
> + int ret;
> + int just_opened = 0;
> + int segment_retries = 0;
> + struct segment *seg;
> +
> + if (c->http_persistent && v->input_read_done) {
> + ret = reload_playlist(v, c);
> + if (ret < 0)
> + return ret;
> + }
> +
> + v->input_read_done = 0;
> +
> +restart:
> + ret = reload_playlist(v, c);
> + if (ret < 0)
> + return ret;
>
> + seg = current_segment(v);
> +
> + if (!v->input || (c->http_persistent && v->input_read_done)) {
> /* load/update Media Initialization Section, if any */
> ret = update_init_section(v, seg);
> if (ret)
> @@ -1630,7 +1657,7 @@ reload:
> } else {
> segment_retries++;
> }
> - goto reload;
> + goto restart;
> }
> segment_retries = 0;
> just_opened = 1;
> @@ -1692,6 +1719,110 @@ reload:
> goto restart;
> }
>
> +static int read_data_subtitle_segment(void *opaque, uint8_t *buf, int buf_size)
> +{
> + struct playlist *v = opaque;
> + HLSContext *c = v->parent->priv_data;
> + int ret;
> + struct segment *seg;
> +
> + if (!v->needed || v->cur_seq_no - v->start_seq_no >= v->n_segments) {
> + return AVERROR_EOF;
> + } else {
> + seg = current_segment(v);
> + }
> +
> + if (!v->input) {
> + ret = open_input(c, v, seg, &v->input);
> + if (ret < 0) {
> + if (ff_check_interrupt(c->interrupt_callback))
> + return AVERROR_EXIT;
> + av_log(v->parent, AV_LOG_WARNING, "Failed to open segment of playlist %d\n",
> + v->index);
> + return ret;
> + }
> + }
> +
> + return read_from_url(v, seg, buf, buf_size);
> +}
> +
> +static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
> + int flags, AVDictionary **opts)
> +{
> + av_log(s, AV_LOG_ERROR,
> + "A HLS playlist item '%s' referred to an external file '%s'. "
> + "Opening this file was forbidden for security reasons\n",
> + s->url, url);
> + return AVERROR(EPERM);
> +}
> +
> +static int init_subtitle_context(struct playlist *pls)
> +{
> + HLSContext *c = pls->parent->priv_data;
> + const AVInputFormat *in_fmt;
> + AVDictionary *opts = NULL;
> + int ret;
> +
> + if (!(pls->ctx = avformat_alloc_context()))
> + return AVERROR(ENOMEM);
> +
> + pls->read_buffer = av_malloc(INITIAL_BUFFER_SIZE);
> + if (!pls->read_buffer) {
> + avformat_free_context(pls->ctx);
> + pls->ctx = NULL;
> + return AVERROR(ENOMEM);
> + }
> +
> + ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> + read_data_subtitle_segment, NULL, NULL);
> + pls->pb.pub.seekable = 0;
> + pls->ctx->pb = &pls->pb.pub;
> + pls->ctx->io_open = nested_io_open;
> +
> + ret = ff_copy_whiteblacklists(pls->ctx, pls->parent);
> + if (ret < 0)
> + return ret;
> +
> + in_fmt = av_find_input_format("webvtt");
> + av_dict_copy(&opts, c->seg_format_opts, 0);
> + ret = avformat_open_input(&pls->ctx, current_segment(pls)->url, in_fmt, &opts);
> + av_dict_free(&opts);
> +
> + return ret;
> +}
> +
> +static int read_subtitle_packet(struct playlist *v, AVPacket *pkt)
> +{
> + HLSContext *c = v->parent->priv_data;
> + int ret;
> +
> +restart:
> + ret = reload_playlist(v, c);
> + if (ret < 0)
> + return ret;
> +
> + if (v->input && !v->ctx)
> + ff_format_io_close(v->parent, &v->input);
> +
> + if (!v->input && !v->ctx) {
> + ret = init_subtitle_context(v);
> + if (ret < 0)
> + return ret;
> + }
> +
> + ret = av_read_frame(v->ctx, v->pkt);
> + if (!ret) {
> + return ret;
> + }
> + ff_format_io_close(v->parent, &v->input);
> + v->cur_seq_no++;
> + c->cur_seq_no = v->cur_seq_no;
> +
> + avformat_close_input(&v->ctx);
> +
> + goto restart;
> +}
> +
> static void add_renditions_to_variant(HLSContext *c, struct variant *var,
> enum AVMediaType type, const char *group_id)
> {
> @@ -1853,16 +1984,6 @@ static int64_t select_cur_seq_no(HLSContext *c, struct playlist *pls)
> return pls->start_seq_no;
> }
>
> -static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
> - int flags, AVDictionary **opts)
> -{
> - av_log(s, AV_LOG_ERROR,
> - "A HLS playlist item '%s' referred to an external file '%s'. "
> - "Opening this file was forbidden for security reasons\n",
> - s->url, url);
> - return AVERROR(EPERM);
> -}
> -
> static void add_stream_to_programs(AVFormatContext *s, struct playlist *pls, AVStream *stream)
> {
> HLSContext *c = s->priv_data;
> @@ -2070,6 +2191,8 @@ static int hls_read_header(AVFormatContext *s)
> highest_cur_seq_no = FFMAX(highest_cur_seq_no, pls->cur_seq_no);
> }
>
> + av_dict_set(&c->seg_format_opts, "prefer_hls_mpegts_pts", "1", 0);
> +
> /* Open the demuxer for each playlist */
> for (i = 0; i < c->n_playlists; i++) {
> struct playlist *pls = c->playlists[i];
> @@ -2107,8 +2230,12 @@ static int hls_read_header(AVFormatContext *s)
> return AVERROR(ENOMEM);
> }
>
> - ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> - read_data, NULL, NULL);
> + if (pls->is_subtitle)
> + ffio_init_context(&pls->pb, (unsigned char*)av_strdup(vtt_sample), (int)strlen(vtt_sample), 0, pls,
> + NULL, NULL, NULL);
> + else
> + ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> + read_data_continuous, NULL, NULL);
1. Unchecked av_strdup().
2. Is duplicating the string even needed? Can't we simply set the
AVIOContext to NULL before closing the AVFormatContext?
>
> /*
> * If encryption scheme is SAMPLE-AES, try to read ID3 tags of
> @@ -2254,6 +2381,13 @@ static int hls_read_header(AVFormatContext *s)
> if (pls->n_main_streams)
> av_dict_copy(&pls->main_streams[0]->metadata, pls->ctx->metadata, 0);
>
> + if (pls->is_subtitle) {
> + avformat_free_context(pls->ctx);
Doesn't the copy of vtt_sample leak here?
> + pls->ctx = NULL;
> + pls->needed = 0;
> + pls->main_streams[0]->discard = AVDISCARD_ALL;
> + }
> +
> add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_AUDIO);
> add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_VIDEO);
> add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_SUBTITLE);
> @@ -2296,6 +2430,8 @@ static int recheck_discard_flags(AVFormatContext *s, int first)
> pls->input_read_done = 0;
> ff_format_io_close(pls->parent, &pls->input_next);
> pls->input_next_requested = 0;
> + if (pls->is_subtitle)
> + avformat_close_input(&pls->ctx);
> pls->needed = 0;
> changed = 1;
> av_log(s, AV_LOG_INFO, "No longer receiving playlist %d\n", i);
> @@ -2363,7 +2499,10 @@ static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
> int64_t ts_diff;
> AVRational tb;
> struct segment *seg = NULL;
> - ret = av_read_frame(pls->ctx, pls->pkt);
> + if (pls->is_subtitle)
> + ret = read_subtitle_packet(pls, pls->pkt);
> + else
> + ret = av_read_frame(pls->ctx, pls->pkt);
> if (ret < 0) {
> if (!avio_feof(&pls->pb.pub) && ret != AVERROR_EOF)
> return ret;
> @@ -2559,7 +2698,10 @@ static int hls_read_seek(AVFormatContext *s, int stream_index,
> /* Reset the pos, to let the mpegts/mov demuxer know we've seeked. */
> pb->pos = 0;
> /* Flush the packet queue of the subdemuxer. */
> - ff_read_frame_flush(pls->ctx);
> + if (pls->ctx)
> + ff_read_frame_flush(pls->ctx);
> + if (pls->is_subtitle)
> + avformat_close_input(&pls->ctx);
>
> /* Reset the init segment so it's re-fetched and served appropiately */
> pls->cur_init_section = NULL;
> @@ -2628,7 +2770,7 @@ static const AVOption hls_options[] = {
> OFFSET(prefer_x_start), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS},
> {"allowed_extensions", "List of file extensions that hls is allowed to access",
> OFFSET(allowed_extensions), AV_OPT_TYPE_STRING,
> - {.str = "3gp,aac,avi,ac3,eac3,flac,mkv,m3u8,m4a,m4s,m4v,mpg,mov,mp2,mp3,mp4,mpeg,mpegts,ogg,ogv,oga,ts,vob,wav"},
> + {.str = "3gp,aac,avi,ac3,eac3,flac,mkv,m3u8,m4a,m4s,m4v,mpg,mov,mp2,mp3,mp4,mpeg,mpegts,ogg,ogv,oga,ts,vob,vtt,wav,webvtt"},
> INT_MIN, INT_MAX, FLAGS},
> {"extension_picky", "Be picky with all extensions matching",
> OFFSET(extension_picky), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS},
More information about the ffmpeg-devel
mailing list