id,summary,reporter,owner,description,type,status,priority,component,version,resolution,keywords,cc,blockedby,blocking,reproduced,analyzed
1163,ffprobe can produce invalid XML,Ian,stefano,"ffprobe can output invalid XML as xml_escape_str only handles < > '  "" and &. For example most escape characters below 32 are invalid UTF-8.

This replacement version of the function replaces any invalid UTF-8 characters by the inverted question mark.

{{{#!C
static const char *xml_escape_str(char **dst, size_t *dst_size, const char *src,
                                  void *log_ctx)
{
    // the unknown character (inverted question mark)
    const unsigned char BAD_CHARACTER_1 = 194, BAD_CHARACTER_2 = 191;

    const char *p;
    char *q;
    int copyAll = 1;
    size_t size = 1;

    /* precompute size */
    for (p = src; *p;) {
        int badChar = 0;
        unsigned char byte;

        ESCAPE_CHECK_SIZE(src, size, SIZE_MAX-10);

        byte = (unsigned char)*p;
        if (byte < 32 && byte != 9 && byte != 10 && byte != 13) {
            badChar = 1;
            ++p;
        } else if (byte < 128) {
            switch (byte) {
                case '&' : size += 5; /* &amp; */  copyAll = 0; break;
                case '<' : size += 4; /* &lt; */   copyAll = 0; break;
                case '>' : size += 4; /* &gt; */   copyAll = 0; break;
                case '\""': size += 6; /* &quot; */ copyAll = 0; break;
                case '\'': size += 6; /* &apos; */ copyAll = 0; break;
                default: size++;
                }
            ++p;
            ++size;
            }
        else if (byte < 0xC0)
            {
            badChar = 1;
            ++p;
            }
        else
            {
            int extra;

            copyAll = 0;
            if (byte < 0xe0)
                extra = 1;
            else if (byte < 0xf0)
                extra = 2;
            else if (byte < 0xf8)
                extra = 3;
            else
                badChar = 1;

            if (badChar)
                ++p;
            else
                {
                ++p;
                for (int i = 0; i < extra && *p; ++i, ++p)
                    {
                    byte = (unsigned char)*p;
                    if ((byte & 0xc0) != 0x80)
                        badChar = 1;
                    }
                if (!badChar)
                    size += extra;
                }
            }
        if (badChar) {
            size += 2;
            copyAll = 0;
            }
        }

    ESCAPE_REALLOC_BUF(dst_size, dst, src, size);

#define COPY_STR(str) {      \
        const char *s = str; \
        while (*s)           \
            *q++ = *s++;     \
    }

    p = src;
    q = *dst;
    if (copyAll)
        COPY_STR(p)
    else {
        while (*p) {
            int badChar = 0;
            unsigned char byte;
    
            byte = (unsigned char)*p;
            if (byte < 32 && byte != 9 && byte != 10 && byte != 13) {
                badChar = 1;
                ++p;
            } else if (byte < 128) {
                switch (byte) {
                    case '&' : COPY_STR(""&amp;"");  break;
                    case '<' : COPY_STR(""&lt;"");   break;
                    case '>' : COPY_STR(""&gt;"");   break;
                    case '\""': COPY_STR(""&quot;""); break;
                    case '\'': COPY_STR(""&apos;""); break;
                    default: *q++ = *p;
                    }
                ++p;
                ++size;
                }
            else if (byte < 0xC0)
                {
                badChar = 1;
                ++p;
                }
            else
                {
                int extra;
    
                copyAll = 0;
                if (byte < 0xe0)
                    extra = 1;
                else if (byte < 0xf0)
                    extra = 2;
                else if (byte < 0xf8)
                    extra = 3;
                else
                    badChar = 1;
    
                if (badChar)
                    ++p;
                else
                    {
                    const char *startChar = p;
                    int i;
                    ++p;
                    for (i = 0; i < extra && *p; ++i, ++p)
                        {
                        byte = (unsigned char)*p;
                        if ((byte & 0xc0) != 0x80)
                            badChar = 1;
                        }
                    if (!badChar) {
                        for (i = 0; i < extra;)
                            *q++ = *startChar++;
                    }
                }
            }
            if (badChar) {
                *q++ = BAD_CHARACTER_1;
                *q++ = BAD_CHARACTER_2;
            }
        }
    }
    *q = 0;

    return *dst;
}
}}}",defect,new,normal,FFprobe,0.10.2,,,n@…,,,0,0
