[FFmpeg-devel] [PATCH] ffprobe: implement string validation policy setting

Stefano Sabatini stefasab at gmail.com
Wed Oct 2 17:52:05 CEST 2013


This should fix trac tickets #1163, #2502, #2955.
---
 doc/ffprobe.texi |  24 ++++++++++
 ffprobe.c        | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 158 insertions(+), 7 deletions(-)

diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
index 777dbe7..55c6e80 100644
--- a/doc/ffprobe.texi
+++ b/doc/ffprobe.texi
@@ -317,6 +317,30 @@ Show information related to program and library versions. This is the
 equivalent of setting both @option{-show_program_version} and
 @option{-show_library_versions} options.
 
+ at item -string_validation_policy @var{policy}
+Set string validation policy. It accepts the following values.
+
+ at table @samp
+ at item fail
+The program will fail immediately in case an invalid string (UTF-8)
+sequence is found in the input. This is especially useful to validate
+input metadata.
+
+ at item replace=REPLACEMENT
+The program will substitute the invalid UTF-8 sequences with the
+string specified in @var{REPLACEMENT}, which is typically a simple
+character.
+
+In case the replacement string is not specified, the program will
+assume the empty string, that is it will remove the invalid sequences
+from the input strings.
+This is especially useful to create validate metadata output from
+invalid sources.
+ at end table
+
+By default the program will apply the replace policy with an empty
+replacement.
+
 @item -bitexact
 Force bitexact output, useful to produce output which is not dependent
 on the specific build.
diff --git a/ffprobe.c b/ffprobe.c
index c4f0a8f..2e2bb03 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -75,6 +75,14 @@ static int show_private_data            = 1;
 static char *print_format;
 static char *stream_specifier;
 
+typedef enum {
+    STRING_VALIDATION_POLICY_FAIL,
+    STRING_VALIDATION_POLICY_REPLACE,
+} StringValidationPolicy;
+
+StringValidationPolicy string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
+static char *string_validation_replace;
+
 typedef struct {
     int id;             ///< identifier
     int64_t start, end; ///< start, end in second/AV_TIME_BASE units
@@ -428,17 +436,93 @@ static inline void writer_print_integer(WriterContext *wctx,
     }
 }
 
+static inline int validate_string(char **dstp, const char *src, void *log_ctx)
+{
+    const uint8_t *p;
+    AVBPrint dstbuf;
+    int invalid_chars_nb = 0, ret = 0;
+
+    av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (p = src; *p;) {
+        uint32_t code;
+        uint8_t tmp;
+        int invalid = 0;
+
+        GET_UTF8(code, *p++, invalid = 1;);
+        if (invalid) {
+            invalid_chars_nb++;
+
+            switch (string_validation_policy) {
+            case STRING_VALIDATION_POLICY_FAIL:
+            {
+                av_log(log_ctx, AV_LOG_ERROR,
+                       "Invalid UTF-8 character found in sequence '%s'\n", src);
+                ret = AVERROR_INVALIDDATA;
+                goto end;
+            };
+            break;
+
+            case STRING_VALIDATION_POLICY_REPLACE:
+            if (string_validation_replace) {
+                const uint8_t *s;
+                for (s = string_validation_replace; *s;) {
+                    GET_UTF8(code, *s++, continue;);
+                    PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
+                }
+            }
+            break;
+            }
+        } else {
+            PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
+        }
+    }
+
+    if (invalid_chars_nb) {
+        av_log(log_ctx, AV_LOG_WARNING,
+               "%d invalid UTF-8 characters found in sequence '%s', "
+               "they have been replaced with '%s'\n",
+               invalid_chars_nb, src, (char *)av_x_if_null(string_validation_replace, ""));
+    }
+
+end:
+    av_bprint_finalize(&dstbuf, dstp);
+    return ret;
+}
+
+#define PRINT_STRING_OPT      1
+#define PRINT_STRING_VALIDATE 2
+
 static inline int writer_print_string(WriterContext *wctx,
-                                      const char *key, const char *val, int opt)
+                                      const char *key, const char *val, int flags)
 {
     const struct section *section = wctx->section[wctx->level];
     int ret = 0;
 
-    if (opt && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
+    if ((flags & PRINT_STRING_OPT)
+        && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
         return 0;
 
     if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) {
-        wctx->writer->print_string(wctx, key, val);
+        if (flags & PRINT_STRING_VALIDATE) {
+            char *key1 = NULL, *val1 = NULL;
+            ret = validate_string(&key1, key, wctx);
+            if (ret < 0) goto end;
+            ret = validate_string(&val1, val, wctx);
+            if (ret < 0) goto end;
+            wctx->writer->print_string(wctx, key1, val1);
+        end:
+            if (ret < 0) {
+                av_log(wctx, AV_LOG_ERROR,
+                       "Invalid key=value string combination %s=%s in section %s\n",
+                       key, val, section->unique_name);
+            }
+            av_free(key1);
+            av_free(val1);
+        } else {
+            wctx->writer->print_string(wctx, key, val);
+        }
+
         wctx->nb_item[wctx->level]++;
     }
 
@@ -460,7 +544,7 @@ static void writer_print_time(WriterContext *wctx, const char *key,
     char buf[128];
 
     if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
-        writer_print_string(wctx, key, "N/A", 1);
+        writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
     } else {
         double d = ts * av_q2d(*time_base);
         struct unit_value uv;
@@ -474,7 +558,7 @@ static void writer_print_time(WriterContext *wctx, const char *key,
 static void writer_print_ts(WriterContext *wctx, const char *key, int64_t ts, int is_duration)
 {
     if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
-        writer_print_string(wctx, key, "N/A", 1);
+        writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
     } else {
         writer_print_integer(wctx, key, ts);
     }
@@ -1443,7 +1527,7 @@ static void writer_register_all(void)
 #define print_int(k, v)         writer_print_integer(w, k, v)
 #define print_q(k, v, s)        writer_print_rational(w, k, v, s)
 #define print_str(k, v)         writer_print_string(w, k, v, 0)
-#define print_str_opt(k, v)     writer_print_string(w, k, v, 1)
+#define print_str_opt(k, v)     writer_print_string(w, k, v, PRINT_STRING_OPT)
 #define print_time(k, v, tb)    writer_print_time(w, k, v, tb, 0)
 #define print_ts(k, v)          writer_print_ts(w, k, v, 0)
 #define print_duration_time(k, v, tb) writer_print_time(w, k, v, tb, 1)
@@ -1468,7 +1552,7 @@ static inline int show_tags(WriterContext *wctx, AVDictionary *tags, int section
     writer_print_section_header(wctx, section_id);
 
     while ((tag = av_dict_get(tags, "", tag, AV_DICT_IGNORE_SUFFIX))) {
-        ret = writer_print_string(wctx, tag->key, tag->value, 0);
+        ret = writer_print_string(wctx, tag->key, tag->value, PRINT_STRING_VALIDATE);
         if (ret < 0)
             break;
     }
@@ -2534,6 +2618,48 @@ static int opt_read_intervals(void *optctx, const char *opt, const char *arg)
     return parse_read_intervals(arg);
 }
 
+static int opt_string_validation_policy(void *optctx, const char *opt, const char *arg)
+{
+    char *mode = av_strdup(arg);
+    char *next;
+    int ret = 0;
+
+    if (!mode) return AVERROR(ENOMEM);
+
+    next = strchr(mode, '=');
+    if (next)
+        *next++ = 0;
+
+    if (!strcmp(mode, "fail")) {
+        string_validation_policy = STRING_VALIDATION_POLICY_FAIL;
+        if (next) {
+            av_log(NULL, AV_LOG_ERROR,
+                   "No argument must be specified for the option %s with mode 'fail'\n",
+                   opt);
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
+    } else if (!strcmp(mode, "replace")) {
+        string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
+        string_validation_replace = av_strdup(next);
+
+        if (next && !string_validation_replace) {
+            ret = AVERROR(ENOMEM);
+            goto end;
+        }
+    } else {
+        av_log(NULL, AV_LOG_ERROR,
+               "Invalid argument '%s' for option '%s', "
+               "choose between fail, or replace=REPLACEMENT\n", arg, opt);
+        ret = AVERROR(EINVAL);
+        goto end;
+    }
+
+end:
+    av_free(mode);
+    return ret;
+}
+
 static int opt_pretty(void *optctx, const char *opt, const char *arg)
 {
     show_value_unit              = 1;
@@ -2633,6 +2759,7 @@ static const OptionDef real_options[] = {
     { "private",           OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" },
     { "bitexact", OPT_BOOL, {&do_bitexact}, "force bitexact output" },
     { "read_intervals", HAS_ARG, {.func_arg = opt_read_intervals}, "set read intervals", "read_intervals" },
+    { "string_validation_policy",  HAS_ARG, {.func_arg = opt_string_validation_policy}, "select the string validation policy", "policy_specification" },
     { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" },
     { "i", HAS_ARG, {.func_arg = opt_input_file_i}, "read specified file", "input_file"},
     { NULL, },
-- 
1.8.1.2



More information about the ffmpeg-devel mailing list