[FFmpeg-devel] [PATCH 1/1] Fixing 3GPP Timed Text (TTXT / tx3g / mov_text) encoding for UTF-8 (ticket 6021)

Erik BrĂ¥then Solem erikbsolem at hotmail.com
Sun Dec 18 22:34:25 EET 2016


According to the format specification (3GPP TS 26.245, section 5.2) "storage
lengths are specified as byte-counts, wheras highlighting is specified using
character offsets." This patch replaces byte counting with character counting
for highlighting. See the following page for a link to the specification:
https://gpac.wp.mines-telecom.fr/mp4box/ttxt-format-documentation/
---
 libavcodec/movtextenc.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index 20e01e2..3ae015a 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -70,6 +70,7 @@ typedef struct {
     uint8_t style_fontsize;
     uint32_t style_color;
     uint16_t text_pos;
+    uint16_t text_pos_chars;
 } MovTextContext;
 
 typedef struct {
@@ -216,10 +217,10 @@ static void mov_text_style_cb(void *priv, const char style, int close)
             }
 
             s->style_attributes_temp->style_flag = 0;
-            s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+            s->style_attributes_temp->style_start = AV_RB16(&s->text_pos_chars);
         } else {
             if (s->style_attributes_temp->style_flag) { //break the style record here and start a new one
-                s->style_attributes_temp->style_end = AV_RB16(&s->text_pos);
+                s->style_attributes_temp->style_end = AV_RB16(&s->text_pos_chars);
                 av_dynarray_add(&s->style_attributes, &s->count, s->style_attributes_temp);
                 s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp));
                 if (!s->style_attributes_temp) {
@@ -230,10 +231,10 @@ static void mov_text_style_cb(void *priv, const char style, int close)
                 }
 
                 s->style_attributes_temp->style_flag = s->style_attributes[s->count - 1]->style_flag;
-                s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+                s->style_attributes_temp->style_start = AV_RB16(&s->text_pos_chars);
             } else {
                 s->style_attributes_temp->style_flag = 0;
-                s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+                s->style_attributes_temp->style_start = AV_RB16(&s->text_pos_chars);
             }
         }
         switch (style){
@@ -248,7 +249,7 @@ static void mov_text_style_cb(void *priv, const char style, int close)
             break;
         }
     } else {
-        s->style_attributes_temp->style_end = AV_RB16(&s->text_pos);
+        s->style_attributes_temp->style_end = AV_RB16(&s->text_pos_chars);
         av_dynarray_add(&s->style_attributes, &s->count, s->style_attributes_temp);
 
         s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp));
@@ -273,7 +274,7 @@ static void mov_text_style_cb(void *priv, const char style, int close)
             break;
         }
         if (s->style_attributes_temp->style_flag) { //start of new style record
-            s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+            s->style_attributes_temp->style_start = AV_RB16(&s->text_pos_chars);
         }
     }
     s->box_flags |= STYL_BOX;
@@ -284,11 +285,11 @@ static void mov_text_color_cb(void *priv, unsigned int color, unsigned int color
     MovTextContext *s = priv;
     if (color_id == 2) {    //secondary color changes
         if (s->box_flags & HLIT_BOX) {  //close tag
-            s->hlit.end = AV_RB16(&s->text_pos);
+            s->hlit.end = AV_RB16(&s->text_pos_chars);
         } else {
             s->box_flags |= HCLR_BOX;
             s->box_flags |= HLIT_BOX;
-            s->hlit.start = AV_RB16(&s->text_pos);
+            s->hlit.start = AV_RB16(&s->text_pos_chars);
             s->hclr.color = color | (0xFF << 24);  //set alpha value to FF
         }
     }
@@ -302,7 +303,10 @@ static void mov_text_text_cb(void *priv, const char *text, int len)
 {
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, text, len);
-    s->text_pos += len;
+    s->text_pos += len;             // length of text in bytes
+    for (int i = 0; i < len; i++)   // length of text in UTF-8 characters
+        if ((text[i] & 0xC0) != 0x80)
+            s->text_pos_chars++;
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
@@ -310,6 +314,7 @@ static void mov_text_new_line_cb(void *priv, int forced)
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, "\n", 1);
     s->text_pos += 1;
+    s->text_pos_chars += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
@@ -328,6 +333,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
     size_t j;
 
     s->text_pos = 0;
+    s->text_pos_chars = 0;
     s->count = 0;
     s->box_flags = 0;
     s->style_entries = 0;
-- 
1.9.5 (Apple Git-50.3)



More information about the ffmpeg-devel mailing list