[FFmpeg-devel] [PATCH] [WIP] avformat/assdec: UTF-16 support
wm4
nfxjfg at googlemail.com
Fri Mar 28 19:33:25 CET 2014
This attempts to add UTF-16 subtitle support in the most simple way
possible. It does so by replacing avio_r8() with ff_text_r8(), which
converts UTF-16 on the fly to UTF-8. If the source is not UTF-16,
it practically wraps avio_r8() without change.
This uses the BOM to recognize UTF-16 files. In practice, all UTF-16
text files have a BOM. (I planned to use a somewhat more robust method
to ddtect UTF-16, similar to MPlayer's subreader, but libavformat's
architecture doesn't allow this easily.)
This also takes care of skipping the BOM properly in the UTF-8 case.
Skipping the BOM is somewhat hard, because AVIOContext does not allow
any readahead. Since ff_text_r8() includes its own read buffer (in case
bytes were read that don't belong to a BOM), this becomes trivial.
The functionality added with this patch could be used to extend other
subtitle formats with UTF-16 support.
It might be possible to implement the functionality provided by
FFTextReader as custom AVIOContext, but I refrained from that
because it's not easily possible to to return the correct stream
position with this, and it also seemed too roundabout.
---
libavformat/assdec.c | 21 ++++++++++++------
libavformat/subtitles.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
libavformat/subtitles.h | 13 +++++++++++
3 files changed, 84 insertions(+), 7 deletions(-)
diff --git a/libavformat/assdec.c b/libavformat/assdec.c
index bb953c7..164dd09 100644
--- a/libavformat/assdec.c
+++ b/libavformat/assdec.c
@@ -33,10 +33,15 @@ typedef struct ASSContext {
static int ass_probe(AVProbeData *p)
{
- const char *header = "[Script Info]";
+ char buf[13];
+ int i;
+ FFTextReader tr;
+ ff_text_init_buf(&tr, p->buf, p->buf_size);
- if (!memcmp(p->buf, header, strlen(header)) ||
- !memcmp(p->buf + 3, header, strlen(header)))
+ for (i = 0; i < 13; i++)
+ buf[i] = ff_text_r8(&tr);
+
+ if (!memcmp(buf, "[Script Info]", 13))
return AVPROBE_SCORE_MAX;
return 0;
@@ -66,13 +71,13 @@ static int read_ts(const uint8_t *p, int64_t *start, int *duration)
return -1;
}
-static int64_t get_line(AVBPrint *buf, AVIOContext *pb)
+static int64_t get_line(AVBPrint *buf, FFTextReader *tr)
{
- int64_t pos = avio_tell(pb);
+ int64_t pos = ff_text_pos(tr);
av_bprint_clear(buf);
for (;;) {
- char c = avio_r8(pb);
+ char c = ff_text_r8(tr);
if (!c)
break;
av_bprint_chars(buf, c, 1);
@@ -88,6 +93,8 @@ static int ass_read_header(AVFormatContext *s)
AVBPrint header, line;
int header_remaining, res = 0;
AVStream *st;
+ FFTextReader tr;
+ ff_text_init_avio(&tr, s->pb);
st = avformat_new_stream(s, NULL);
if (!st)
@@ -102,7 +109,7 @@ static int ass_read_header(AVFormatContext *s)
av_bprint_init(&line, 0, AV_BPRINT_SIZE_UNLIMITED);
for (;;) {
- int64_t pos = get_line(&line, s->pb);
+ int64_t pos = get_line(&line, &tr);
if (!line.str[0]) // EOF
break;
diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
index fce2bf1..84c7c51 100644
--- a/libavformat/subtitles.c
+++ b/libavformat/subtitles.c
@@ -20,9 +20,66 @@
#include "avformat.h"
#include "subtitles.h"
+#include "avio_internal.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
+void ff_text_init_avio(FFTextReader *r, AVIOContext *pb)
+{
+ int i;
+ r->pb = pb;
+ r->buf_pos = r->buf_len = 0;
+ r->type = 0;
+ for (i = 0; i < 2; i++)
+ r->buf[r->buf_len++] = avio_r8(r->pb);
+ if (strncmp("\xFF\xFE", r->buf, 2) == 0) {
+ r->type = 1; // UTF16LE
+ r->buf_pos += 2;
+ } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) {
+ r->type = 2; // UTF16BE
+ r->buf_pos += 2;
+ } else {
+ r->buf[r->buf_len++] = avio_r8(r->pb);
+ if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) {
+ // UTF8
+ r->buf_pos += 3;
+ }
+ }
+}
+
+void ff_text_init_buf(FFTextReader *r, void *buf, size_t size)
+{
+ memset(&r->buf_pb, 0, sizeof(r->buf_pb));
+ ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL);
+ ff_text_init_avio(r, &r->buf_pb);
+}
+
+int64_t ff_text_pos(FFTextReader *r)
+{
+ return avio_tell(r->pb) - r->buf_len + r->buf_pos;
+}
+
+int ff_text_r8(FFTextReader *r)
+{
+ uint32_t val;
+ uint8_t tmp;
+ if (r->buf_pos < r->buf_len)
+ return r->buf[r->buf_pos++];
+ if (r->type == 1) {
+ GET_UTF16(val, avio_rl16(r->pb), return 0;)
+ } else if (r->type == 2) {
+ GET_UTF16(val, avio_rb16(r->pb), return 0;)
+ } else {
+ return avio_r8(r->pb);
+ }
+ if (!val)
+ return 0;
+ r->buf_pos = 0;
+ r->buf_len = 0;
+ PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;)
+ return r->buf[r->buf_pos++]; // buf_len is at least 1
+}
+
AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
const uint8_t *event, int len, int merge)
{
diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h
index b5a96ec..f105bcc 100644
--- a/libavformat/subtitles.h
+++ b/libavformat/subtitles.h
@@ -31,6 +31,19 @@ enum sub_sort {
};
typedef struct {
+ int type;
+ AVIOContext *pb;
+ unsigned char buf[8];
+ int buf_pos, buf_len;
+ AVIOContext buf_pb;
+} FFTextReader;
+
+void ff_text_init_avio(FFTextReader *r, AVIOContext *pb);
+void ff_text_init_buf(FFTextReader *r, void *buf, size_t size);
+int64_t ff_text_pos(FFTextReader *r);
+int ff_text_r8(FFTextReader *r);
+
+typedef struct {
AVPacket *subs; ///< array of subtitles packets
int nb_subs; ///< number of subtitles packets
int allocated_size; ///< allocated size for subs
--
1.9.1
More information about the ffmpeg-devel
mailing list