[FFmpeg-devel] [PATCH 3/3] lavc: check decoded subtitles encoding.

Nicolas George nicolas.george at normalesup.org
Sun Apr 7 10:23:46 CEST 2013


Address trac ticket #2431.

Signed-off-by: Nicolas George <nicolas.george at normalesup.org>
---
 libavcodec/utils.c            |   13 ++++++++++++-
 tests/fate/subtitles.mak      |    4 ++--
 tests/ref/fate/sub-aqtitle    |    2 +-
 tests/ref/fate/sub-subviewer1 |    2 +-
 4 files changed, 16 insertions(+), 5 deletions(-)


The FATE ref changes correspond to files encoded in UTF-8 rather than
windows-1252, with the exact same content.

This patch may annoy users that rely on the subtitles encoders and decoders
to be 8-bit agnostic, but in the long run this is not a "feature" that can
be guaranteed to work IMHO. Could be stashed until input encoding
autodetection and output recoding is implemented.


diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index e4ea325..fa1bc7f 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -2200,7 +2200,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                              int *got_sub_ptr,
                              AVPacket *avpkt)
 {
-    int ret = 0;
+    int i, ret = 0;
 
     if (avctx->codec->type != AVMEDIA_TYPE_SUBTITLE) {
         av_log(avctx, AV_LOG_ERROR, "Invalid media type for subtitles\n");
@@ -2237,6 +2237,17 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                                                      avctx->pkt_timebase, ms);
             }
 
+            for (i = 0; i < sub->num_rects; i++) {
+                if (sub->rects[i]->ass &&
+                    !av_is_valid_utf8(sub->rects[i]->ass, -1, NULL, 0)) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "Invalid UTF-8 in decoded subtitles text; "
+                           "maybe missing -sub_charenc option\n");
+                    avsubtitle_free(sub);
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+
             if (tmp.data != pkt_recoded.data) { // did we recode?
                 /* prevent from destroying side data from original packet */
                 pkt_recoded.side_data = NULL;
diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak
index 8e586fb..ef381f9 100644
--- a/tests/fate/subtitles.mak
+++ b/tests/fate/subtitles.mak
@@ -1,5 +1,5 @@
 FATE_SUBTITLES_ASS-$(call DEMDEC, AQTITLE, TEXT) += fate-sub-aqtitle
-fate-sub-aqtitle: CMD = md5 -i $(SAMPLES)/sub/AQTitle_capability_tester.aqt -f ass
+fate-sub-aqtitle: CMD = md5 -sub_charenc windows-1250 -i $(SAMPLES)/sub/AQTitle_capability_tester.aqt -f ass
 
 FATE_SUBTITLES_ASS-$(call DEMDEC, JACOSUB, JACOSUB) += fate-sub-jacosub
 fate-sub-jacosub: CMD = md5 -i $(SAMPLES)/sub/JACOsub_capability_tester.jss -f ass
@@ -41,7 +41,7 @@ FATE_SUBTITLES-$(call ALLYES, MOV_DEMUXER MOVTEXT_DECODER SUBRIP_ENCODER) += fat
 fate-sub-subripenc: CMD = md5 -i $(SAMPLES)/sub/MovText_capability_tester.mp4 -scodec subrip -f srt
 
 FATE_SUBTITLES_ASS-$(call DEMDEC, SUBVIEWER1, SUBVIEWER1) += fate-sub-subviewer1
-fate-sub-subviewer1: CMD = md5 -i $(SAMPLES)/sub/SubViewer1_capability_tester.sub -f ass
+fate-sub-subviewer1: CMD = md5 -sub_charenc windows-1250 -i $(SAMPLES)/sub/SubViewer1_capability_tester.sub -f ass
 
 FATE_SUBTITLES_ASS-$(call DEMDEC, SUBVIEWER, SUBVIEWER) += fate-sub-subviewer
 fate-sub-subviewer: CMD = md5 -i $(SAMPLES)/sub/SubViewer_capability_tester.sub -f ass
diff --git a/tests/ref/fate/sub-aqtitle b/tests/ref/fate/sub-aqtitle
index f6900e7..c477b0a 100644
--- a/tests/ref/fate/sub-aqtitle
+++ b/tests/ref/fate/sub-aqtitle
@@ -1 +1 @@
-e888e1354cd0968895ab89cb169fec31
+1c68def68db6536c235819cbe0638e00
diff --git a/tests/ref/fate/sub-subviewer1 b/tests/ref/fate/sub-subviewer1
index 116fce7..052431a 100644
--- a/tests/ref/fate/sub-subviewer1
+++ b/tests/ref/fate/sub-subviewer1
@@ -1 +1 @@
-0c2096fedf7c971742b2e879bb303ce9
+cbeb015b1125757eed814f212cfc6c9c
-- 
1.7.10.4



More information about the ffmpeg-devel mailing list