FFmpeg
webvttdec.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * WebVTT subtitle demuxer
24  * @see http://dev.w3.org/html5/webvtt/
25  */
26 
27 #include "avformat.h"
28 #include "internal.h"
29 #include "subtitles.h"
30 #include "libavutil/bprint.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/opt.h"
33 
34 typedef struct {
35  const AVClass *class;
37  int kind;
39 
40 static int webvtt_probe(const AVProbeData *p)
41 {
42  const uint8_t *ptr = p->buf;
43 
44  if (AV_RB24(ptr) == 0xEFBBBF)
45  ptr += 3; /* skip UTF-8 BOM */
46  if (!strncmp(ptr, "WEBVTT", 6) &&
47  (!ptr[6] || strchr("\n\r\t ", ptr[6])))
48  return AVPROBE_SCORE_MAX;
49  return 0;
50 }
51 
52 static int64_t read_ts(const char *s)
53 {
54  int hh, mm, ss, ms;
55  if (sscanf(s, "%u:%u:%u.%u", &hh, &mm, &ss, &ms) == 4) return (hh*3600LL + mm*60LL + ss) * 1000LL + ms;
56  if (sscanf(s, "%u:%u.%u", &mm, &ss, &ms) == 3) return ( mm*60LL + ss) * 1000LL + ms;
57  return AV_NOPTS_VALUE;
58 }
59 
61 {
62  WebVTTContext *webvtt = s->priv_data;
63  AVBPrint header, cue;
64  int res = 0;
66 
67  if (!st)
68  return AVERROR(ENOMEM);
69  avpriv_set_pts_info(st, 64, 1, 1000);
72  st->disposition |= webvtt->kind;
73 
76 
77  for (;;) {
78  int i;
79  int64_t pos;
80  AVPacket *sub;
81  const char *p, *identifier, *settings;
82  int identifier_len, settings_len;
83  int64_t ts_start, ts_end;
84 
85  ff_subtitles_read_chunk(s->pb, &cue);
86 
87  if (!cue.len)
88  break;
89 
90  p = identifier = cue.str;
91  pos = avio_tell(s->pb);
92 
93  /* ignore header chunk */
94  if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) ||
95  !strncmp(p, "WEBVTT", 6) ||
96  !strncmp(p, "NOTE", 4))
97  continue;
98 
99  /* optional cue identifier (can be a number like in SRT or some kind of
100  * chaptering id) */
101  for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
102  if (!strncmp(p + i, "-->", 3)) {
103  identifier = NULL;
104  break;
105  }
106  }
107  if (!identifier)
108  identifier_len = 0;
109  else {
110  identifier_len = strcspn(p, "\r\n");
111  p += identifier_len;
112  if (*p == '\r')
113  p++;
114  if (*p == '\n')
115  p++;
116  }
117 
118  /* cue timestamps */
119  if ((ts_start = read_ts(p)) == AV_NOPTS_VALUE)
120  break;
121  if (!(p = strstr(p, "-->")))
122  break;
123  p += 2;
124  do p++; while (*p == ' ' || *p == '\t');
125  if ((ts_end = read_ts(p)) == AV_NOPTS_VALUE)
126  break;
127 
128  /* optional cue settings */
129  p += strcspn(p, "\n\t ");
130  while (*p == '\t' || *p == ' ')
131  p++;
132  settings = p;
133  settings_len = strcspn(p, "\r\n");
134  p += settings_len;
135  if (*p == '\r')
136  p++;
137  if (*p == '\n')
138  p++;
139 
140  /* create packet */
141  sub = ff_subtitles_queue_insert(&webvtt->q, p, strlen(p), 0);
142  if (!sub) {
143  res = AVERROR(ENOMEM);
144  goto end;
145  }
146  sub->pos = pos;
147  sub->pts = ts_start;
148  sub->duration = ts_end - ts_start;
149 
150 #define SET_SIDE_DATA(name, type) do { \
151  if (name##_len) { \
152  uint8_t *buf = av_packet_new_side_data(sub, type, name##_len); \
153  if (!buf) { \
154  res = AVERROR(ENOMEM); \
155  goto end; \
156  } \
157  memcpy(buf, name, name##_len); \
158  } \
159 } while (0)
160 
163  }
164 
165  ff_subtitles_queue_finalize(s, &webvtt->q);
166 
167 end:
168  av_bprint_finalize(&cue, NULL);
169  av_bprint_finalize(&header, NULL);
170  return res;
171 }
172 
174 {
175  WebVTTContext *webvtt = s->priv_data;
176  return ff_subtitles_queue_read_packet(&webvtt->q, pkt);
177 }
178 
179 static int webvtt_read_seek(AVFormatContext *s, int stream_index,
180  int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
181 {
182  WebVTTContext *webvtt = s->priv_data;
183  return ff_subtitles_queue_seek(&webvtt->q, s, stream_index,
184  min_ts, ts, max_ts, flags);
185 }
186 
188 {
189  WebVTTContext *webvtt = s->priv_data;
190  ff_subtitles_queue_clean(&webvtt->q);
191  return 0;
192 }
193 
194 #define OFFSET(x) offsetof(WebVTTContext, x)
195 #define KIND_FLAGS AV_OPT_FLAG_SUBTITLE_PARAM|AV_OPT_FLAG_DECODING_PARAM
196 
197 static const AVOption options[] = {
198  { "kind", "Set kind of WebVTT track", OFFSET(kind), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, KIND_FLAGS, "webvtt_kind" },
199  { "subtitles", "WebVTT subtitles kind", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, KIND_FLAGS, "webvtt_kind" },
200  { "captions", "WebVTT captions kind", 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_CAPTIONS }, INT_MIN, INT_MAX, KIND_FLAGS, "webvtt_kind" },
201  { "descriptions", "WebVTT descriptions kind", 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_DESCRIPTIONS }, INT_MIN, INT_MAX, KIND_FLAGS, "webvtt_kind" },
202  { "metadata", "WebVTT metadata kind", 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_METADATA }, INT_MIN, INT_MAX, KIND_FLAGS, "webvtt_kind" },
203  { NULL }
204 };
205 
207  .class_name = "WebVTT demuxer",
208  .item_name = av_default_item_name,
209  .option = options,
210  .version = LIBAVUTIL_VERSION_INT,
211 };
212 
214  .name = "webvtt",
215  .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
216  .priv_data_size = sizeof(WebVTTContext),
220  .read_seek2 = webvtt_read_seek,
222  .extensions = "vtt",
223  .priv_class = &webvtt_demuxer_class,
224 };
#define AV_DISPOSITION_METADATA
Definition: avformat.h:863
#define NULL
Definition: coverity.c:32
static int webvtt_read_close(AVFormatContext *s)
Definition: webvttdec.c:187
The optional first identifier line of a WebVTT cue.
Definition: avcodec.h:1343
AVOption.
Definition: opt.h:246
#define KIND_FLAGS
Definition: webvttdec.c:195
static int webvtt_read_header(AVFormatContext *s)
Definition: webvttdec.c:60
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
int64_t pos
byte position in stream, -1 if unknown
Definition: avcodec.h:1500
void avpriv_set_pts_info(AVStream *s, int pts_wrap_bits, unsigned int pts_num, unsigned int pts_den)
Set the time base and wrapping info for a given stream.
Definition: utils.c:4926
void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext.
Definition: subtitles.c:400
static const AVOption options[]
Definition: webvttdec.c:197
enum AVCodecID codec_id
Specific type of the encoded data (the codec used).
Definition: avcodec.h:3968
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:191
void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q)
Remove and destroy all the subtitles packets.
Definition: subtitles.c:300
static AVPacket pkt
static int webvtt_read_seek(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
Definition: webvttdec.c:179
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:235
Format I/O context.
Definition: avformat.h:1358
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
static int webvtt_probe(const AVProbeData *p)
Definition: webvttdec.c:40
uint8_t
AVOptions.
int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
Generic read_packet() callback for subtitles demuxers using this queue system.
Definition: subtitles.c:211
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: avcodec.h:1498
AVStream * avformat_new_stream(AVFormatContext *s, const AVCodec *c)
Add a new stream to a media file.
Definition: utils.c:4499
static const AVClass webvtt_demuxer_class
Definition: webvttdec.c:206
static av_cold int read_close(AVFormatContext *ctx)
Definition: libcdio.c:145
static av_always_inline int64_t avio_tell(AVIOContext *s)
ftell() equivalent for AVIOContext.
Definition: avio.h:557
static const uint8_t header[24]
Definition: sdr2.c:67
#define AV_DISPOSITION_CAPTIONS
To specify text track kind (different from subtitles default).
Definition: avformat.h:861
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_BPRINT_SIZE_UNLIMITED
AVInputFormat ff_webvtt_demuxer
Definition: webvttdec.c:213
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
enum AVMediaType codec_type
General type of the encoded data.
Definition: avcodec.h:3964
unsigned char * buf
Buffer must have AVPROBE_PADDING_SIZE of extra allocated bytes filled with zero.
Definition: avformat.h:448
#define ss(width, name, subs,...)
Definition: cbs_vp9.c:261
#define s(width, name)
Definition: cbs_vp9.c:257
static int read_header(FFV1Context *f)
Definition: ffv1dec.c:530
Stream structure.
Definition: avformat.h:881
static int read_packet(void *opaque, uint8_t *buf, int buf_size)
Definition: avio_reading.c:42
#define SET_SIDE_DATA(name, type)
int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
Update current_sub_idx to emulate a seek.
Definition: subtitles.c:248
#define AV_DISPOSITION_DESCRIPTIONS
Definition: avformat.h:862
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_WL32 unsigned int_TMPL AV_WL24 unsigned int_TMPL AV_WL16 uint64_t_TMPL AV_WB64 unsigned int_TMPL AV_WB32 unsigned int_TMPL AV_RB24
Definition: bytestream.h:87
AVIOContext * pb
I/O context.
Definition: avformat.h:1400
Describe the class of an AVClass context structure.
Definition: log.h:67
#define OFFSET(x)
Definition: webvttdec.c:194
This structure contains the data a format has to probe a file.
Definition: avformat.h:446
static int64_t read_ts(const char *s)
Definition: webvttdec.c:52
#define flags(name, subs,...)
Definition: cbs_av1.c:561
static int read_probe(const AVProbeData *pd)
Definition: jvdec.c:55
#define AVPROBE_SCORE_MAX
maximum score
Definition: avformat.h:458
Main libavformat public API header.
int disposition
AV_DISPOSITION_* bit field.
Definition: avformat.h:934
FFDemuxSubtitlesQueue q
Definition: webvttdec.c:36
The optional settings (rendering instructions) that immediately follow the timestamp specifier of a W...
Definition: avcodec.h:1349
void * priv_data
Format private data.
Definition: avformat.h:1386
const char * name
A comma separated list of short names for the format.
Definition: avformat.h:654
AVCodecParameters * codecpar
Codec parameters associated with this stream.
Definition: avformat.h:1028
AVPacket * ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, const uint8_t *event, size_t len, int merge)
Insert a new subtitle event.
Definition: subtitles.c:111
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
static int webvtt_read_packet(AVFormatContext *s, AVPacket *pkt)
Definition: webvttdec.c:173
This structure stores compressed data.
Definition: avcodec.h:1457
void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
Set missing durations, sort subtitles by PTS (and then byte position), and drop duplicated events...
Definition: subtitles.c:193
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1473
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248