[FFmpeg-cvslog] lavf/url: rewrite ff_make_absolute_url() using ff_url_decompose().
Nicolas George
git at videolan.org
Wed Sep 9 23:42:30 EEST 2020
ffmpeg | branch: release/4.3 | Nicolas George <george at nsup.org> | Thu Jul 30 00:02:10 2020 +0200| [5382d3b853952b93773e084ab74e793ac4205af8] | committer: Marton Balint
lavf/url: rewrite ff_make_absolute_url() using ff_url_decompose().
Also add and update some tests.
Change the semantic a little, because for filesytem paths
symlinks complicate things.
See the comments in the code for detail.
Fix trac tickets #8813 and 8814.
(cherry picked from commit 1201687da268c11459891a80ca1972aeaca8db88)
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5382d3b853952b93773e084ab74e793ac4205af8
---
libavformat/tests/url.c | 60 ++++++++++-
libavformat/url.c | 261 ++++++++++++++++++++++++------------------------
libavformat/url.h | 4 +-
tests/ref/fate/url | 54 +++++++++-
4 files changed, 245 insertions(+), 134 deletions(-)
diff --git a/libavformat/tests/url.c b/libavformat/tests/url.c
index e7d259ab7d..2440ae08bc 100644
--- a/libavformat/tests/url.c
+++ b/libavformat/tests/url.c
@@ -49,7 +49,13 @@ static void test_decompose(const char *url)
static void test(const char *base, const char *rel)
{
char buf[200], buf2[200];
- ff_make_absolute_url(buf, sizeof(buf), base, rel);
+ int ret;
+
+ ret = ff_make_absolute_url(buf, sizeof(buf), base, rel);
+ if (ret < 0) {
+ printf("%50s %-20s => error %s\n", base, rel, av_err2str(ret));
+ return;
+ }
printf("%50s %-20s => %s\n", base, rel, buf);
if (base) {
/* Test in-buffer replacement */
@@ -104,6 +110,58 @@ int main(void)
test("http://server/foo/bar", "/../../../../../other/url");
test("http://server/foo/bar", "/test/../../../../../other/url");
test("http://server/foo/bar", "/test/../../test/../../../other/url");
+ test("http://server/foo/bar", "file:../baz/qux");
+ test("http://server/foo//bar/", "../../");
+ test("file:../tmp/foo", "../bar/");
+ test("file:../tmp/foo", "file:../bar/");
+ test("http://server/foo/bar", "./");
+ test("http://server/foo/bar", ".dotfile");
+ test("http://server/foo/bar", "..doubledotfile");
+ test("http://server/foo/bar", "double..dotfile");
+ test("http://server/foo/bar", "doubledotfile..");
+
+ /* From https://tools.ietf.org/html/rfc3986#section-5.4 */
+ test("http://a/b/c/d;p?q", "g:h"); // g:h
+ test("http://a/b/c/d;p?q", "g"); // http://a/b/c/g
+ test("http://a/b/c/d;p?q", "./g"); // http://a/b/c/g
+ test("http://a/b/c/d;p?q", "g/"); // http://a/b/c/g/
+ test("http://a/b/c/d;p?q", "/g"); // http://a/g
+ test("http://a/b/c/d;p?q", "//g"); // http://g
+ test("http://a/b/c/d;p?q", "?y"); // http://a/b/c/d;p?y
+ test("http://a/b/c/d;p?q", "g?y"); // http://a/b/c/g?y
+ test("http://a/b/c/d;p?q", "#s"); // http://a/b/c/d;p?q#s
+ test("http://a/b/c/d;p?q", "g#s"); // http://a/b/c/g#s
+ test("http://a/b/c/d;p?q", "g?y#s"); // http://a/b/c/g?y#s
+ test("http://a/b/c/d;p?q", ";x"); // http://a/b/c/;x
+ test("http://a/b/c/d;p?q", "g;x"); // http://a/b/c/g;x
+ test("http://a/b/c/d;p?q", "g;x?y#s"); // http://a/b/c/g;x?y#s
+ test("http://a/b/c/d;p?q", ""); // http://a/b/c/d;p?q
+ test("http://a/b/c/d;p?q", "."); // http://a/b/c/
+ test("http://a/b/c/d;p?q", "./"); // http://a/b/c/
+ test("http://a/b/c/d;p?q", ".."); // http://a/b/
+ test("http://a/b/c/d;p?q", "../"); // http://a/b/
+ test("http://a/b/c/d;p?q", "../g"); // http://a/b/g
+ test("http://a/b/c/d;p?q", "../.."); // http://a/
+ test("http://a/b/c/d;p?q", "../../"); // http://a/
+ test("http://a/b/c/d;p?q", "../../g"); // http://a/g
+ test("http://a/b/c/d;p?q", "../../../g"); // http://a/g
+ test("http://a/b/c/d;p?q", "../../../../g"); // http://a/g
+ test("http://a/b/c/d;p?q", "/./g"); // http://a/g
+ test("http://a/b/c/d;p?q", "/../g"); // http://a/g
+ test("http://a/b/c/d;p?q", "g."); // http://a/b/c/g.
+ test("http://a/b/c/d;p?q", ".g"); // http://a/b/c/.g
+ test("http://a/b/c/d;p?q", "g.."); // http://a/b/c/g..
+ test("http://a/b/c/d;p?q", "..g"); // http://a/b/c/..g
+ test("http://a/b/c/d;p?q", "./../g"); // http://a/b/g
+ test("http://a/b/c/d;p?q", "./g/."); // http://a/b/c/g/
+ test("http://a/b/c/d;p?q", "g/./h"); // http://a/b/c/g/h
+ test("http://a/b/c/d;p?q", "g/../h"); // http://a/b/c/h
+ test("http://a/b/c/d;p?q", "g;x=1/./y"); // http://a/b/c/g;x=1/y
+ test("http://a/b/c/d;p?q", "g;x=1/../y"); // http://a/b/c/y
+ test("http://a/b/c/d;p?q", "g?y/./x"); // http://a/b/c/g?y/./x
+ test("http://a/b/c/d;p?q", "g?y/../x"); // http://a/b/c/g?y/../x
+ test("http://a/b/c/d;p?q", "g#s/./x"); // http://a/b/c/g#s/./x
+ test("http://a/b/c/d;p?q", "g#s/../x"); // http://a/b/c/g#s/../x
printf("\nTesting av_url_split:\n");
test2("/foo/bar");
diff --git a/libavformat/url.c b/libavformat/url.c
index b92409f6b2..3c858f0257 100644
--- a/libavformat/url.c
+++ b/libavformat/url.c
@@ -149,146 +149,149 @@ int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
return 0;
}
-static void trim_double_dot_url(char *buf, const char *rel, int size)
+static int append_path(char *root, char *out_end, char **rout,
+ const char *in, const char *in_end)
{
- const char *p = rel;
- const char *root = rel;
- char tmp_path[MAX_URL_SIZE] = {0, };
- char *sep;
- char *node;
-
- /* Get the path root of the url which start by "://" */
- if (p && (sep = strstr(p, "://"))) {
- sep += 3;
- root = strchr(sep, '/');
- if (!root)
- return;
- }
-
- /* set new current position if the root node is changed */
- p = root;
- while (p && (node = strstr(p, ".."))) {
- av_strlcat(tmp_path, p, node - p + strlen(tmp_path));
- p = node + 3;
- sep = strrchr(tmp_path, '/');
- if (sep)
- sep[0] = '\0';
- else
- tmp_path[0] = '\0';
+ char *out = *rout;
+ const char *d, *next;
+
+ if (in < in_end && *in == '/')
+ in++; /* already taken care of */
+ while (in < in_end) {
+ d = find_delim("/", in, in_end);
+ next = d + (d < in_end && *d == '/');
+ if (d - in == 1 && in[0] == '.') {
+ /* skip */
+ } else if (d - in == 2 && in[0] == '.' && in[1] == '.') {
+ av_assert1(out[-1] == '/');
+ if (out - root > 1)
+ while (out > root && (--out)[-1] != '/');
+ } else {
+ if (out_end - out < next - in)
+ return AVERROR(ENOMEM);
+ memmove(out, in, next - in);
+ out += next - in;
+ }
+ in = next;
}
-
- if (!av_stristart(p, "/", NULL) && root != rel)
- av_strlcat(tmp_path, "/", size);
-
- av_strlcat(tmp_path, p, size);
- /* start set buf after temp path process. */
- av_strlcpy(buf, rel, root - rel + 1);
-
- if (!av_stristart(tmp_path, "/", NULL) && root != rel)
- av_strlcat(buf, "/", size);
-
- av_strlcat(buf, tmp_path, size);
+ *rout = out;
+ return 0;
}
-void ff_make_absolute_url(char *buf, int size, const char *base,
+int ff_make_absolute_url(char *buf, int size, const char *base,
const char *rel)
{
- char *sep, *path_query;
- char *root, *p;
- char tmp_path[MAX_URL_SIZE];
-
- memset(tmp_path, 0, sizeof(tmp_path));
- /* Absolute path, relative to the current server */
- if (base && strstr(base, "://") && rel[0] == '/') {
- if (base != buf)
- av_strlcpy(buf, base, size);
- sep = strstr(buf, "://");
- if (sep) {
- /* Take scheme from base url */
- if (rel[1] == '/') {
- sep[1] = '\0';
- } else {
- /* Take scheme and host from base url */
- sep += 3;
- sep = strchr(sep, '/');
- if (sep)
- *sep = '\0';
- }
- }
- av_strlcat(buf, rel, size);
- trim_double_dot_url(tmp_path, buf, size);
- memset(buf, 0, size);
- av_strlcpy(buf, tmp_path, size);
- return;
+ URLComponents ub, uc;
+ char *out, *out_end, *path;
+ const char *keep, *base_path_end;
+ int use_base_path, simplify_path = 0, ret;
+
+ /* This is tricky.
+ For HTTP, http://server/site/page + ../media/file
+ should resolve into http://server/media/file
+ but for filesystem access, dir/playlist + ../media/file
+ should resolve into dir/../media/file
+ because dir could be a symlink, and .. points to
+ the actual parent of the target directory.
+
+ We'll consider that URLs with an actual scheme and authority,
+ i.e. starting with scheme://, need parent dir simplification,
+ while bare paths or pseudo-URLs starting with proto: without
+ the double slash do not.
+
+ For real URLs, the processing is similar to the algorithm described
+ here:
+ https://tools.ietf.org/html/rfc3986#section-5
+ */
+
+ if (!size)
+ return AVERROR(ENOMEM);
+ out = buf;
+ out_end = buf + size - 1;
+
+ if (!base)
+ base = "";
+ if ((ret = ff_url_decompose(&ub, base, NULL) < 0) ||
+ (ret = ff_url_decompose(&uc, rel, NULL) < 0))
+ goto error;
+
+ keep = ub.url;
+#define KEEP(component, also) do { \
+ if (uc.url_component_end_##component == uc.url && \
+ ub.url_component_end_##component > keep) { \
+ keep = ub.url_component_end_##component; \
+ also \
+ } \
+ } while (0)
+ KEEP(scheme, );
+ KEEP(authority_full, simplify_path = 1;);
+ KEEP(path,);
+ KEEP(query,);
+ KEEP(fragment,);
+#undef KEEP
+#define COPY(start, end) do { \
+ size_t len = end - start; \
+ if (len > out_end - out) { \
+ ret = AVERROR(ENOMEM); \
+ goto error; \
+ } \
+ memmove(out, start, len); \
+ out += len; \
+ } while (0)
+ COPY(ub.url, keep);
+ COPY(uc.url, uc.path);
+
+ use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path;
+ if (uc.path > uc.url)
+ use_base_path = 0;
+ if (URL_COMPONENT_HAVE(uc, path) && uc.path[0] == '/')
+ use_base_path = 0;
+ if (use_base_path) {
+ base_path_end = ub.url_component_end_path;
+ if (URL_COMPONENT_HAVE(uc, path))
+ while (base_path_end > ub.path && base_path_end[-1] != '/')
+ base_path_end--;
}
- /* If rel actually is an absolute url, just copy it */
- if (!base || strstr(rel, "://") || rel[0] == '/') {
- memset(buf, 0, size);
- trim_double_dot_url(buf, rel, size);
- return;
- }
- if (base != buf)
- av_strlcpy(buf, base, size);
-
- /* Strip off any query string from base */
- path_query = strchr(buf, '?');
- if (path_query)
- *path_query = '\0';
-
- /* Is relative path just a new query part? */
- if (rel[0] == '?') {
- av_strlcat(buf, rel, size);
- trim_double_dot_url(tmp_path, buf, size);
- memset(buf, 0, size);
- av_strlcpy(buf, tmp_path, size);
- return;
- }
-
- root = p = buf;
- /* Get the path root of the url which start by "://" */
- if (p && strstr(p, "://")) {
- sep = strstr(p, "://");
- if (sep) {
- sep += 3;
- root = strchr(sep, '/');
- if (!root)
- return;
+ if (keep > ub.path)
+ simplify_path = 0;
+ if (URL_COMPONENT_HAVE(uc, scheme))
+ simplify_path = 0;
+ if (URL_COMPONENT_HAVE(uc, authority))
+ simplify_path = 1;
+ /* No path at all, leave it */
+ if (!use_base_path && !URL_COMPONENT_HAVE(uc, path))
+ simplify_path = 0;
+
+ if (simplify_path) {
+ const char *root = "/";
+ COPY(root, root + 1);
+ path = out;
+ if (use_base_path) {
+ ret = append_path(path, out_end, &out, ub.path, base_path_end);
+ if (ret < 0)
+ goto error;
}
+ if (URL_COMPONENT_HAVE(uc, path)) {
+ ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path);
+ if (ret < 0)
+ goto error;
+ }
+ } else {
+ if (use_base_path)
+ COPY(ub.path, base_path_end);
+ COPY(uc.path, uc.url_component_end_path);
}
- /* Remove the file name from the base url */
- sep = strrchr(buf, '/');
- if (sep && sep <= root)
- sep = root;
-
- if (sep)
- sep[1] = '\0';
- else
- buf[0] = '\0';
- while (av_strstart(rel, "..", NULL) && sep) {
- /* Remove the path delimiter at the end */
- if (sep > root) {
- sep[0] = '\0';
- sep = strrchr(buf, '/');
- }
+ COPY(uc.url_component_end_path, uc.end);
+#undef COPY
+ *out = 0;
+ return 0;
- /* If the next directory name to pop off is "..", break here */
- if (!strcmp(sep ? &sep[1] : buf, "..")) {
- /* Readd the slash we just removed */
- av_strlcat(buf, "/", size);
- break;
- }
- /* Cut off the directory name */
- if (sep)
- sep[1] = '\0';
- else
- buf[0] = '\0';
- rel += 3;
- }
- av_strlcat(buf, rel, size);
- trim_double_dot_url(tmp_path, buf, size);
- memset(buf, 0, size);
- av_strlcpy(buf, tmp_path, size);
+error:
+ snprintf(buf, size, "invalid:%s",
+ ret == AVERROR(ENOMEM) ? "truncated" :
+ ret == AVERROR(EINVAL) ? "syntax_error" : "");
+ return ret;
}
AVIODirEntry *ff_alloc_dir_entry(void)
diff --git a/libavformat/url.h b/libavformat/url.h
index ec70ce14c7..728a861bd5 100644
--- a/libavformat/url.h
+++ b/libavformat/url.h
@@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char *proto,
* @param base the base url, may be equal to buf.
* @param rel the new url, which is interpreted relative to base
*/
-void ff_make_absolute_url(char *buf, int size, const char *base,
- const char *rel);
+int ff_make_absolute_url(char *buf, int size, const char *base,
+ const char *rel);
/**
* Allocate directory entry with default values.
diff --git a/tests/ref/fate/url b/tests/ref/fate/url
index 84cf85abdd..7e6395c47b 100644
--- a/tests/ref/fate/url
+++ b/tests/ref/fate/url
@@ -46,9 +46,9 @@ http://[::1]:8080/dev/null =>
Testing ff_make_absolute_url:
(null) baz => baz
/foo/bar baz => /foo/baz
- /foo/bar ../baz => /baz
+ /foo/bar ../baz => /foo/../baz
/foo/bar /baz => /baz
- /foo/bar ../../../baz => /baz
+ /foo/bar ../../../baz => /foo/../../../baz
http://server/foo/ baz => http://server/foo/baz
http://server/foo/bar baz => http://server/foo/baz
http://server/foo/ ../baz => http://server/baz
@@ -62,6 +62,56 @@ Testing ff_make_absolute_url:
http://server/foo/bar /../../../../../other/url => http://server/other/url
http://server/foo/bar /test/../../../../../other/url => http://server/other/url
http://server/foo/bar /test/../../test/../../../other/url => http://server/other/url
+ http://server/foo/bar file:../baz/qux => file:../baz/qux
+ http://server/foo//bar/ ../../ => http://server/foo/
+ file:../tmp/foo ../bar/ => file:../tmp/../bar/
+ file:../tmp/foo file:../bar/ => file:../bar/
+ http://server/foo/bar ./ => http://server/foo/
+ http://server/foo/bar .dotfile => http://server/foo/.dotfile
+ http://server/foo/bar ..doubledotfile => http://server/foo/..doubledotfile
+ http://server/foo/bar double..dotfile => http://server/foo/double..dotfile
+ http://server/foo/bar doubledotfile.. => http://server/foo/doubledotfile..
+ http://a/b/c/d;p?q g:h => g:h
+ http://a/b/c/d;p?q g => http://a/b/c/g
+ http://a/b/c/d;p?q ./g => http://a/b/c/g
+ http://a/b/c/d;p?q g/ => http://a/b/c/g/
+ http://a/b/c/d;p?q /g => http://a/g
+ http://a/b/c/d;p?q //g => http://g
+ http://a/b/c/d;p?q ?y => http://a/b/c/d;p?y
+ http://a/b/c/d;p?q g?y => http://a/b/c/g?y
+ http://a/b/c/d;p?q #s => http://a/b/c/d;p?q#s
+ http://a/b/c/d;p?q g#s => http://a/b/c/g#s
+ http://a/b/c/d;p?q g?y#s => http://a/b/c/g?y#s
+ http://a/b/c/d;p?q ;x => http://a/b/c/;x
+ http://a/b/c/d;p?q g;x => http://a/b/c/g;x
+ http://a/b/c/d;p?q g;x?y#s => http://a/b/c/g;x?y#s
+ http://a/b/c/d;p?q => http://a/b/c/d;p?q
+ http://a/b/c/d;p?q . => http://a/b/c/
+ http://a/b/c/d;p?q ./ => http://a/b/c/
+ http://a/b/c/d;p?q .. => http://a/b/
+ http://a/b/c/d;p?q ../ => http://a/b/
+ http://a/b/c/d;p?q ../g => http://a/b/g
+ http://a/b/c/d;p?q ../.. => http://a/
+ http://a/b/c/d;p?q ../../ => http://a/
+ http://a/b/c/d;p?q ../../g => http://a/g
+ http://a/b/c/d;p?q ../../../g => http://a/g
+ http://a/b/c/d;p?q ../../../../g => http://a/g
+ http://a/b/c/d;p?q /./g => http://a/g
+ http://a/b/c/d;p?q /../g => http://a/g
+ http://a/b/c/d;p?q g. => http://a/b/c/g.
+ http://a/b/c/d;p?q .g => http://a/b/c/.g
+ http://a/b/c/d;p?q g.. => http://a/b/c/g..
+ http://a/b/c/d;p?q ..g => http://a/b/c/..g
+ http://a/b/c/d;p?q ./../g => http://a/b/g
+ http://a/b/c/d;p?q ./g/. => http://a/b/c/g/
+ http://a/b/c/d;p?q g/./h => http://a/b/c/g/h
+ http://a/b/c/d;p?q g/../h => http://a/b/c/h
+ http://a/b/c/d;p?q g;x=1/./y => http://a/b/c/g;x=1/y
+ http://a/b/c/d;p?q g;x=1/../y => http://a/b/c/y
+ http://a/b/c/d;p?q g?y/./x => http://a/b/c/g?y/./x
+ http://a/b/c/d;p?q g?y/../x => http://a/b/c/g?y/../x
+ http://a/b/c/d;p?q g#s/./x => http://a/b/c/g#s/./x
+ http://a/b/c/d;p?q g#s/../x => http://a/b/c/g#s/../x
Testing av_url_split:
/foo/bar => -1 /foo/bar
More information about the ffmpeg-cvslog
mailing list