[FFmpeg-devel] [PATCH] movtextenc: fix handling of utf-8 subtitles

Philip Langdale philipl at overt.org
Wed Mar 28 06:07:05 EEST 2018


See the earlier fix for movtextdec for details. The equivalent bug is
present on the encoder side as well.

We need to track the text length in 'characters' (which seems to really
mean codepoints) to ensure that styles are applied across the correct
ranges.

Signed-off-by: Philip Langdale <philipl at overt.org>
---
 libavcodec/movtextenc.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index d795e317c3..fd0743f752 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -304,11 +304,33 @@ static void mov_text_color_cb(void *priv, unsigned int color, unsigned int color
      */
 }
 
+static uint16_t utf8_strlen(const char *text, int len)
+{
+    uint16_t i = 0, ret = 0;
+    while (i < len) {
+        char c = text[i];
+        if (c >= 0)
+            i += 1;
+        else if ((c & 0xE0) == 0xC0)
+            i += 2;
+        else if ((c & 0xF0) == 0xE0)
+            i += 3;
+        else if ((c & 0xF8) == 0xF0)
+            i += 4;
+        else
+            return 0;
+        ret++;
+    }
+    return ret;
+}
+
 static void mov_text_text_cb(void *priv, const char *text, int len)
 {
+    uint16_t utf8_len = utf8_strlen(text, len);
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, text, len);
-    s->text_pos += len;
+    // If it's not utf-8, just use the byte length
+    s->text_pos += utf8_len ? utf8_len : len;
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
-- 
2.14.1



More information about the ffmpeg-devel mailing list