[FFmpeg-devel] [PATCH] avcodec/webvttdec: Unescape HTML entities

Ricardo Constantino wiiaboo at gmail.com
Fri Oct 9 00:45:08 CEST 2015


Also fixes adjacent tags not being parsed correctly.

Signed-off-by: Ricardo Constantino <wiiaboo at gmail.com>
---
 libavcodec/webvttdec.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
index 1284a17..ae16630 100644
--- a/libavcodec/webvttdec.c
+++ b/libavcodec/webvttdec.c
@@ -37,11 +37,14 @@ static const struct {
     {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
     {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
     {"{", "\\{"}, {"}", "\\}"}, // escape to avoid ASS markup conflicts
+    {">", ">"}, {"<", "<"},
+    {"‎", ""}, {"‏", ""}, // FIXME: properly honor bidi marks
+    {"&", "&"}, {" ", " "},
 };
 
 static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
 {
-    int i, skip = 0;
+    int i, again = 0;
 
     while (*p) {
 
@@ -51,19 +54,20 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
             if (!strncmp(p, from, len)) {
                 av_bprintf(buf, "%s", webvtt_tag_replace[i].to);
                 p += len;
+                again = 1;
                 break;
             }
         }
         if (!*p)
             break;
 
-        if (*p == '<')
-            skip = 1;
-        else if (*p == '>')
-            skip = 0;
+        if (again) {
+            again = 0;
+            continue;
+        }
         else if (p[0] == '\n' && p[1])
             av_bprintf(buf, "\\N");
-        else if (!skip && *p != '\r')
+        else if (*p != '\r')
             av_bprint_chars(buf, *p, 1);
         p++;
     }
-- 
2.6.0



More information about the ffmpeg-devel mailing list