[FFmpeg-devel] [PATCH 2/5] lavu/bprint: add XML escaping

Nicolas George george at nsup.org
Wed Apr 12 16:23:22 EEST 2017


Le tridi 23 germinal, an CCXXV, Rodger Combs a écrit :
> ---
>  libavutil/avstring.h | 28 ++++++++++++++++++++++++++++
>  libavutil/bprint.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 71 insertions(+)
> 
> diff --git a/libavutil/avstring.h b/libavutil/avstring.h
> index 04d2695640..68b753a569 100644
> --- a/libavutil/avstring.h
> +++ b/libavutil/avstring.h
> @@ -314,6 +314,7 @@ enum AVEscapeMode {
>      AV_ESCAPE_MODE_AUTO,      ///< Use auto-selected escaping mode.
>      AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
>      AV_ESCAPE_MODE_QUOTE,     ///< Use single-quote escaping.
> +    AV_ESCAPE_MODE_XML,       ///< Use XML ampersand-escaping; requires UTF-8 input.
>  };
>  
>  /**
> @@ -334,6 +335,33 @@ enum AVEscapeMode {
>  #define AV_ESCAPE_FLAG_STRICT (1 << 1)
>  
>  /**
> + * In addition to the provided list, escape all characters outside the range of
> + * U+0020 to U+007E.
> + * This only applies to XML-escaping.
> + */
> +#define AV_ESCAPE_FLAG_NON_ASCII (1 << 2)
> +

> +/**
> + * In addition to the provided list, escape single or double quotes.
> + * This only applies to XML-escaping.
> + */

I think this doxy comment, written like that, only applies to SINGLE.

> +#define AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE (1 << 3)
> +#define AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE (1 << 4)

Maybe also:

#define AV_ESCAPE_FLAG_ESCAPE_QUOTES (AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE|AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE)

?

> +
> +/**
> + * Replace invalid UTF-8 characters with a U+FFFD REPLACEMENT CHARACTER, escaped
> + * if AV_ESCAPE_FLAG_NON_ASCII is set.
> + * This only applies to XML-escaping.
> + */
> +#define AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES (1 << 5)
> +
> +/**
> + * Replace invalid UTF-8 characters with a '?', overriding the previous flag.
> + * This only applies to XML-escaping.
> + */
> +#define AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII (1 << 6)
> +
> +/**
>   * Escape string in src, and put the escaped string in an allocated
>   * string in *dst, which must be freed with av_free().
>   *
> diff --git a/libavutil/bprint.c b/libavutil/bprint.c
> index 652775bef9..8e44c57346 100644
> --- a/libavutil/bprint.c
> +++ b/libavutil/bprint.c
> @@ -302,5 +302,48 @@ void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char *special_cha
>          }
>          av_bprint_chars(dstbuf, '\'', 1);
>          break;
> +
> +    case AV_ESCAPE_MODE_XML:
> +        /* &;-escape characters */
> +        while (*src) {
> +            uint8_t tmp;
> +            uint32_t cp;
> +            const char *src1 = src;
> +            GET_UTF8(cp, (uint8_t)*src++, goto err;);
> +
> +            if ((cp < 0xFF &&
> +                 ((special_chars && strchr(special_chars, cp)) ||
> +                  (flags & AV_ESCAPE_FLAG_WHITESPACE) && strchr(WHITESPACES, cp))) ||

> +                (!(flags & AV_ESCAPE_FLAG_STRICT) &&
> +                 (cp == '&' || cp == '<' || cp == '>')) ||

& and < must always be encoded in XML. And > can sometimes be left
alone, but so few people use it that I think it is not worth the effort.

> +                ((flags & AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE) && cp == '\'') ||
> +                ((flags & AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE) && cp == '"') ||
> +                ((flags & AV_ESCAPE_FLAG_NON_ASCII) && (cp < 0x20 || cp > 0x7e))) {
> +                switch (cp) {
> +                case '&' : av_bprintf(dstbuf, "&");  break;
> +                case '<' : av_bprintf(dstbuf, "<");   break;
> +                case '>' : av_bprintf(dstbuf, ">");   break;
> +                case '"' : av_bprintf(dstbuf, """); break;
> +                case '\'': av_bprintf(dstbuf, "'"); break;
> +                default:   av_bprintf(dstbuf, "&#x%"PRIx32";", cp); break;
> +                }
> +            } else {
> +                PUT_UTF8(cp, tmp, av_bprint_chars(dstbuf, tmp, 1);)
> +            }
> +            continue;
> +        err:
> +            if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII) {
> +                av_bprint_chars(dstbuf, '?', 1);
> +            } else if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES) {
> +                if (flags & AV_ESCAPE_FLAG_NON_ASCII)
> +                    av_bprintf(dstbuf, "\xEF\xBF\xBD");
> +                else
> +                    av_bprintf(dstbuf, "&#xfffd;");
> +            } else {
> +                while (src1 < src)
> +                    av_bprint_chars(dstbuf, *src1++, 1);
> +            }
> +        }
> +        break;
>      }
>  }

Regards,

-- 
  Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20170412/61d8931b/attachment.sig>


More information about the ffmpeg-devel mailing list