[FFmpeg-devel] [PATCH 2/2] lavc: support subtitles charset conversion.
Clément Bœsch
ubitux at gmail.com
Fri Feb 15 17:43:39 CET 2013
On Fri, Feb 15, 2013 at 05:18:17PM +0100, Nicolas George wrote:
> Le septidi 27 pluviôse, an CCXXI, Clement Boesch a écrit :
> > Ping on this.
> >
> > I can rebase the two branches if needed, but I'd like some comment on
> > whether it sounds OK for a first version or if there are still design
> > issues. I admit I have a bit lost track of what was expected since last
> > time. Nicolas?
>
> Could you re-send, or just point to, the latest version? With the various
> proposals at cross-purpose, I do not remember what it was. Thanks.
>
Rebased on master. The two branches are now updated on my github. Also
attached the two patches from the -nofilter one.
--
Clément B.
-------------- next part --------------
From eca1faebbf3e21bf4925ae26f4ceb9eef4afb744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux at gmail.com>
Date: Sat, 5 Jan 2013 11:06:31 +0100
Subject: [PATCH 1/2] lavc: mark bitmap based subtitles codecs as such.
---
libavcodec/avcodec.h | 4 ++++
libavcodec/codec_desc.c | 4 ++++
libavcodec/utils.c | 2 +-
3 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 68eac55..fc7091c 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -540,6 +540,10 @@ typedef struct AVCodecDescriptor {
* Codec supports lossless compression. Audio and video codecs only.
*/
#define AV_CODEC_PROP_LOSSLESS (1 << 2)
+/**
+ * Subtitle codec is bitmap based
+ */
+#define AV_CODEC_PROP_BITMAP_SUB (1 << 16)
#if FF_API_OLD_DECODE_AUDIO
/* in bytes */
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 0806b5b..440e9d9 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -2371,12 +2371,14 @@ static const AVCodecDescriptor codec_descriptors[] = {
.type = AVMEDIA_TYPE_SUBTITLE,
.name = "dvd_subtitle",
.long_name = NULL_IF_CONFIG_SMALL("DVD subtitles"),
+ .props = AV_CODEC_PROP_BITMAP_SUB,
},
{
.id = AV_CODEC_ID_DVB_SUBTITLE,
.type = AVMEDIA_TYPE_SUBTITLE,
.name = "dvb_subtitle",
.long_name = NULL_IF_CONFIG_SMALL("DVB subtitles"),
+ .props = AV_CODEC_PROP_BITMAP_SUB,
},
{
.id = AV_CODEC_ID_TEXT,
@@ -2389,6 +2391,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
.type = AVMEDIA_TYPE_SUBTITLE,
.name = "xsub",
.long_name = NULL_IF_CONFIG_SMALL("XSUB"),
+ .props = AV_CODEC_PROP_BITMAP_SUB,
},
{
.id = AV_CODEC_ID_SSA,
@@ -2407,6 +2410,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
.type = AVMEDIA_TYPE_SUBTITLE,
.name = "hdmv_pgs_subtitle",
.long_name = NULL_IF_CONFIG_SMALL("HDMV Presentation Graphic Stream subtitles"),
+ .props = AV_CODEC_PROP_BITMAP_SUB,
},
{
.id = AV_CODEC_ID_DVB_TELETEXT,
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 569f2ff..2493798 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -1851,7 +1851,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
sub->pts = av_rescale_q(avpkt->pts,
avctx->pkt_timebase, AV_TIME_BASE_Q);
ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &tmp);
- sub->format = sub->num_rects && sub->rects[0]->ass;
+ sub->format = !(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB);
avctx->pkt = NULL;
if (did_split) {
--
1.8.1.3
-------------- next part --------------
From 1eb424d688127c37b19765549b612977e760655c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux at gmail.com>
Date: Mon, 7 Jan 2013 18:08:56 +0100
Subject: [PATCH 2/2] lavc: support subtitles character encoding conversion.
TODO: bump lavc micro
---
Changelog | 1 +
configure | 2 +
libavcodec/avcodec.h | 18 ++++++++
libavcodec/options_table.h | 1 +
libavcodec/utils.c | 108 +++++++++++++++++++++++++++++++++++++++++++--
5 files changed, 127 insertions(+), 3 deletions(-)
diff --git a/Changelog b/Changelog
index 4a88e5a..24aeeea 100644
--- a/Changelog
+++ b/Changelog
@@ -18,6 +18,7 @@ version <next>:
- il filter ported from libmpcodecs
- support ID3v2 tags in ASF files
- RF64 support in WAV muxer
+- Subtitles character re-encoding
version 1.1:
diff --git a/configure b/configure
index 94aff58..1f6172e 100755
--- a/configure
+++ b/configure
@@ -1390,6 +1390,7 @@ HAVE_LIST="
gnu_as
gsm_h
ibm_asm
+ iconv
inet_aton
io_h
isatty
@@ -3715,6 +3716,7 @@ check_func getopt
check_func getrusage
check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
check_func gettimeofday
+check_func iconv
check_func inet_aton $network_extralibs
check_func isatty
check_func localtime_r
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index fc7091c..4721087 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3208,6 +3208,24 @@ typedef struct AVCodecContext {
* - encoding: unused
*/
AVDictionary *metadata;
+
+ /**
+ * Character encoding of the input subtitles file.
+ * - decoding: set by user
+ * - encoding: unused
+ */
+ char *sub_charenc;
+
+ /**
+ * Subtitles character encoding mode.
+ * - decoding: set by libavcodec, not intended to be used by user apps
+ * - encoding: unused
+ */
+ int sub_charenc_mode;
+#define FF_SUB_CHARENC_MODE_DO_NOTHING -1 ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
+#define FF_SUB_CHARENC_MODE_AUTOMATIC 0 ///< libavcodec will select the mode itself
+#define FF_SUB_CHARENC_MODE_DECODER_PRE 1 ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
+//#define FF_SUB_CHARENC_MODE_DECODER_POST 2 ///< the AVSubitle data needs to be recoded to UTF-8 after the decoder pass, requires iconv
} AVCodecContext;
AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx);
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 33cb4b2..f27f5f0 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -406,6 +406,7 @@ static const AVOption options[]={
{"ka", "Karaoke", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"},
{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
+{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D},
{NULL},
};
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 2493798..13bc9ad 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -48,6 +48,9 @@
#include <stdarg.h>
#include <limits.h>
#include <float.h>
+#if HAVE_ICONV
+# include <iconv.h>
+#endif
volatile int ff_avcodec_locked;
static int volatile entangled_thread_counter = 0;
@@ -1068,6 +1071,34 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
ret = AVERROR(EINVAL);
goto free_and_end;
}
+ if (avctx->sub_charenc) {
+ if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) {
+ av_log(avctx, AV_LOG_ERROR, "Character encoding is only "
+ "supported with subtitles codecs\n");
+ ret = AVERROR(EINVAL);
+ goto free_and_end;
+ } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) {
+ av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, "
+ "subtitles character encoding will be ignored\n",
+ avctx->codec_descriptor->name);
+ avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING;
+ } else {
+ /* input character encoding is set for a text based subtitle
+ * codec at this point */
+ if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC)
+ avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DECODER_PRE;
+
+ if (!HAVE_ICONV && avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_DECODER_PRE) {
+ av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles "
+ "conversion needs a libavcodec built with iconv support "
+ "for this codec\n");
+ ret = AVERROR(ENOSYS);
+ goto free_and_end;
+ }
+ }
+ } else {
+ avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING;
+ }
}
end:
ff_unlock_avcodec();
@@ -1826,6 +1857,68 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
return ret;
}
+#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
+static int recode_subtitle(AVCodecContext *avctx,
+ AVPacket *outpkt, const AVPacket *inpkt)
+{
+#if HAVE_ICONV
+ iconv_t cd = (iconv_t)-1;
+ int ret = 0;
+ char *inb, *outb;
+ size_t inl, outl;
+ AVPacket tmp;
+#endif
+
+ if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_DECODER_PRE)
+ return 0;
+
+#if HAVE_ICONV
+ cd = iconv_open("UTF-8", avctx->sub_charenc);
+ if (cd == (iconv_t)-1) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context "
+ "with input character encoding \"%s\"\n", avctx->sub_charenc);
+ ret = AVERROR(errno);
+ goto end;
+ }
+
+ inb = inpkt->data;
+ inl = inpkt->size;
+
+ if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) {
+ av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
+ ret = AVERROR(ENOMEM);
+ goto end;
+ }
+
+ ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES);
+ if (ret < 0)
+ goto end;
+ outpkt->data = tmp.data;
+ outpkt->size = tmp.size;
+ outb = outpkt->data;
+ outl = outpkt->size;
+
+ if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 ||
+ iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 ||
+ outl >= outpkt->size || inl != 0) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
+ "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
+ av_free_packet(&tmp);
+ ret = AVERROR(errno);
+ goto end;
+ }
+ outpkt->size -= outl;
+ outpkt->data[outpkt->size - 1] = '\0';
+
+end:
+ if (cd != (iconv_t)-1)
+ iconv_close(cd);
+ return ret;
+#else
+ av_assert0(!"requesting subtitles recoding without iconv");
+#endif
+}
+
int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
int *got_sub_ptr,
AVPacket *avpkt)
@@ -1841,19 +1934,28 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
avcodec_get_subtitle_defaults(sub);
if (avpkt->size) {
+ AVPacket pkt_recoded;
AVPacket tmp = *avpkt;
int did_split = av_packet_split_side_data(&tmp);
//apply_param_change(avctx, &tmp);
- avctx->pkt = &tmp;
+ pkt_recoded = tmp;
+ ret = recode_subtitle(avctx, &pkt_recoded, &tmp);
+ if (ret < 0) {
+ *got_sub_ptr = 0;
+ } else {
+ avctx->pkt = &pkt_recoded;
if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE)
sub->pts = av_rescale_q(avpkt->pts,
avctx->pkt_timebase, AV_TIME_BASE_Q);
- ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &tmp);
+ ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded);
+ if (tmp.data != pkt_recoded.data)
+ av_free(pkt_recoded.data);
sub->format = !(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB);
-
avctx->pkt = NULL;
+ }
+
if (did_split) {
ff_packet_free_side_data(&tmp);
if(ret == tmp.size)
--
1.8.1.3
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130215/8d863533/attachment.asc>
More information about the ffmpeg-devel
mailing list