[FFmpeg-devel] [PATCH 2/5] lavu/bprint: add XML escaping

Rodger Combs rodger.combs at gmail.com
Wed Apr 12 10:11:24 EEST 2017


---
 libavutil/avstring.h | 28 ++++++++++++++++++++++++++++
 libavutil/bprint.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/libavutil/avstring.h b/libavutil/avstring.h
index 04d2695640..68b753a569 100644
--- a/libavutil/avstring.h
+++ b/libavutil/avstring.h
@@ -314,6 +314,7 @@ enum AVEscapeMode {
     AV_ESCAPE_MODE_AUTO,      ///< Use auto-selected escaping mode.
     AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
     AV_ESCAPE_MODE_QUOTE,     ///< Use single-quote escaping.
+    AV_ESCAPE_MODE_XML,       ///< Use XML ampersand-escaping; requires UTF-8 input.
 };
 
 /**
@@ -334,6 +335,33 @@ enum AVEscapeMode {
 #define AV_ESCAPE_FLAG_STRICT (1 << 1)
 
 /**
+ * In addition to the provided list, escape all characters outside the range of
+ * U+0020 to U+007E.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_NON_ASCII (1 << 2)
+
+/**
+ * In addition to the provided list, escape single or double quotes.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE (1 << 3)
+#define AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE (1 << 4)
+
+/**
+ * Replace invalid UTF-8 characters with a U+FFFD REPLACEMENT CHARACTER, escaped
+ * if AV_ESCAPE_FLAG_NON_ASCII is set.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES (1 << 5)
+
+/**
+ * Replace invalid UTF-8 characters with a '?', overriding the previous flag.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII (1 << 6)
+
+/**
  * Escape string in src, and put the escaped string in an allocated
  * string in *dst, which must be freed with av_free().
  *
diff --git a/libavutil/bprint.c b/libavutil/bprint.c
index 652775bef9..8e44c57346 100644
--- a/libavutil/bprint.c
+++ b/libavutil/bprint.c
@@ -302,5 +302,48 @@ void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char *special_cha
         }
         av_bprint_chars(dstbuf, '\'', 1);
         break;
+
+    case AV_ESCAPE_MODE_XML:
+        /* &;-escape characters */
+        while (*src) {
+            uint8_t tmp;
+            uint32_t cp;
+            const char *src1 = src;
+            GET_UTF8(cp, (uint8_t)*src++, goto err;);
+
+            if ((cp < 0xFF &&
+                 ((special_chars && strchr(special_chars, cp)) ||
+                  (flags & AV_ESCAPE_FLAG_WHITESPACE) && strchr(WHITESPACES, cp))) ||
+                (!(flags & AV_ESCAPE_FLAG_STRICT) &&
+                 (cp == '&' || cp == '<' || cp == '>')) ||
+                ((flags & AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE) && cp == '\'') ||
+                ((flags & AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE) && cp == '"') ||
+                ((flags & AV_ESCAPE_FLAG_NON_ASCII) && (cp < 0x20 || cp > 0x7e))) {
+                switch (cp) {
+                case '&' : av_bprintf(dstbuf, "&");  break;
+                case '<' : av_bprintf(dstbuf, "<");   break;
+                case '>' : av_bprintf(dstbuf, ">");   break;
+                case '"' : av_bprintf(dstbuf, """); break;
+                case '\'': av_bprintf(dstbuf, "'"); break;
+                default:   av_bprintf(dstbuf, "&#x%"PRIx32";", cp); break;
+                }
+            } else {
+                PUT_UTF8(cp, tmp, av_bprint_chars(dstbuf, tmp, 1);)
+            }
+            continue;
+        err:
+            if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII) {
+                av_bprint_chars(dstbuf, '?', 1);
+            } else if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES) {
+                if (flags & AV_ESCAPE_FLAG_NON_ASCII)
+                    av_bprintf(dstbuf, "\xEF\xBF\xBD");
+                else
+                    av_bprintf(dstbuf, "&#xfffd;");
+            } else {
+                while (src1 < src)
+                    av_bprint_chars(dstbuf, *src1++, 1);
+            }
+        }
+        break;
     }
 }
-- 
2.11.1



More information about the ffmpeg-devel mailing list