FFmpeg
url.c
Go to the documentation of this file.
1 /*
2  * URL utility functions
3  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <string.h>
23 
24 #include "config.h"
25 #include "avio.h"
26 #include "url.h"
27 #if CONFIG_NETWORK
28 #include "network.h"
29 #endif
30 #include "libavutil/avassert.h"
31 #include "libavutil/avstring.h"
32 #include "libavutil/error.h"
33 #include "libavutil/mem.h"
34 
35 /**
36  * @file
37  * URL utility functions.
38  */
39 
40 int ff_url_join(char *str, int size, const char *proto,
41  const char *authorization, const char *hostname,
42  int port, const char *fmt, ...)
43 {
44 #if CONFIG_NETWORK
45  struct addrinfo hints = { 0 }, *ai;
46 #endif
47 
48  str[0] = '\0';
49  if (proto)
50  av_strlcatf(str, size, "%s://", proto);
51  if (authorization && authorization[0])
52  av_strlcatf(str, size, "%s@", authorization);
53 #if CONFIG_NETWORK && defined(AF_INET6)
54  /* Determine if hostname is a numerical IPv6 address,
55  * properly escape it within [] in that case. */
56  hints.ai_flags = AI_NUMERICHOST;
57  if (!getaddrinfo(hostname, NULL, &hints, &ai)) {
58  if (ai->ai_family == AF_INET6) {
59  av_strlcat(str, "[", size);
60  av_strlcat(str, hostname, size);
61  av_strlcat(str, "]", size);
62  } else {
63  av_strlcat(str, hostname, size);
64  }
65  freeaddrinfo(ai);
66  } else
67 #endif
68  /* Not an IPv6 address, just output the plain string. */
69  av_strlcat(str, hostname, size);
70 
71  if (port >= 0)
72  av_strlcatf(str, size, ":%d", port);
73  if (fmt) {
74  va_list vl;
75  size_t len = strlen(str);
76 
77  va_start(vl, fmt);
78  vsnprintf(str + len, size > len ? size - len : 0, fmt, vl);
79  va_end(vl);
80  }
81  return strlen(str);
82 }
83 
84 static const char *find_delim(const char *delim, const char *cur, const char *end)
85 {
86  while (cur < end && !strchr(delim, *cur))
87  cur++;
88  return cur;
89 }
90 
91 int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
92 {
93  const char *cur, *aend, *p;
94 
95  av_assert0(url);
96  if (!end)
97  end = url + strlen(url);
98  cur = uc->url = url;
99 
100  /* scheme */
101  uc->scheme = cur;
102  p = find_delim(":/?#", cur, end); /* lavf "schemes" can contain options but not some RFC 3986 delimiters */
103  if (*p == ':')
104  cur = p + 1;
105 
106  /* authority */
107  uc->authority = cur;
108  if (end - cur >= 2 && cur[0] == '/' && cur[1] == '/') {
109  cur += 2;
110  aend = find_delim("/?#", cur, end);
111 
112  /* userinfo */
113  uc->userinfo = cur;
114  p = find_delim("@", cur, aend);
115  if (*p == '@')
116  cur = p + 1;
117 
118  /* host */
119  uc->host = cur;
120  if (*cur == '[') { /* hello IPv6, thanks for using colons! */
121  p = find_delim("]", cur, aend);
122  if (*p != ']')
123  return AVERROR(EINVAL);
124  if (p + 1 < aend && p[1] != ':')
125  return AVERROR(EINVAL);
126  cur = p + 1;
127  } else {
128  cur = find_delim(":", cur, aend);
129  }
130 
131  /* port */
132  uc->port = cur;
133  cur = aend;
134  } else {
135  uc->userinfo = uc->host = uc->port = cur;
136  }
137 
138  /* path */
139  uc->path = cur;
140  cur = find_delim("?#", cur, end);
141 
142  /* query */
143  uc->query = cur;
144  if (*cur == '?')
145  cur = find_delim("#", cur, end);
146 
147  /* fragment */
148  uc->fragment = cur;
149 
150  uc->end = end;
151  return 0;
152 }
153 
154 static int is_fq_dos_path(const char *path)
155 {
156  if ((path[0] >= 'a' && path[0] <= 'z' || path[0] >= 'A' && path[0] <= 'Z') &&
157  path[1] == ':' &&
158  (path[2] == '/' || path[2] == '\\'))
159  return 1;
160  if ((path[0] == '/' || path[0] == '\\') &&
161  (path[1] == '/' || path[1] == '\\'))
162  return 1;
163  return 0;
164 }
165 
166 static int append_path(char *root, char *out_end, char **rout,
167  const char *in, const char *in_end)
168 {
169  char *out = *rout;
170  const char *d, *next;
171 
172  if (in < in_end && *in == '/')
173  in++; /* already taken care of */
174  while (in < in_end) {
175  d = find_delim("/", in, in_end);
176  next = d + (d < in_end && *d == '/');
177  if (d - in == 1 && in[0] == '.') {
178  /* skip */
179  } else if (d - in == 2 && in[0] == '.' && in[1] == '.') {
180  av_assert1(out[-1] == '/');
181  if (out - root > 1)
182  while (out > root && (--out)[-1] != '/');
183  } else {
184  if (out_end - out < next - in)
185  return AVERROR(ENOMEM);
186  memmove(out, in, next - in);
187  out += next - in;
188  }
189  in = next;
190  }
191  *rout = out;
192  return 0;
193 }
194 
195 int ff_make_absolute_url2(char *buf, int size, const char *base,
196  const char *rel, int handle_dos_paths)
197 {
198  URLComponents ub, uc;
199  char *out, *out_end, *path;
200  const char *keep, *base_path_end;
201  int use_base_path, simplify_path = 0, ret;
202  const char *base_separators = "/";
203 
204  /* This is tricky.
205  For HTTP, http://server/site/page + ../media/file
206  should resolve into http://server/media/file
207  but for filesystem access, dir/playlist + ../media/file
208  should resolve into dir/../media/file
209  because dir could be a symlink, and .. points to
210  the actual parent of the target directory.
211 
212  We'll consider that URLs with an actual scheme and authority,
213  i.e. starting with scheme://, need parent dir simplification,
214  while bare paths or pseudo-URLs starting with proto: without
215  the double slash do not.
216 
217  For real URLs, the processing is similar to the algorithm described
218  here:
219  https://tools.ietf.org/html/rfc3986#section-5
220  */
221 
222  if (!size)
223  return AVERROR(ENOMEM);
224  out = buf;
225  out_end = buf + size - 1;
226 
227  if (!base)
228  base = "";
229  if (handle_dos_paths) {
230  if ((ret = ff_url_decompose(&ub, base, NULL)) < 0)
231  goto error;
232  if (is_fq_dos_path(base) || av_strstart(base, "file:", NULL) || ub.path == ub.url) {
233  base_separators = "/\\";
234  if (is_fq_dos_path(rel))
235  base = "";
236  }
237  }
238  if ((ret = ff_url_decompose(&ub, base, NULL)) < 0 ||
239  (ret = ff_url_decompose(&uc, rel, NULL)) < 0)
240  goto error;
241 
242  keep = ub.url;
243 #define KEEP(component, also) do { \
244  if (uc.url_component_end_##component == uc.url && \
245  ub.url_component_end_##component > keep) { \
246  keep = ub.url_component_end_##component; \
247  also \
248  } \
249  } while (0)
250  KEEP(scheme, );
251  KEEP(authority_full, simplify_path = 1;);
252  KEEP(path,);
253  KEEP(query,);
254  KEEP(fragment,);
255 #undef KEEP
256 #define COPY(start, end) do { \
257  size_t len = end - start; \
258  if (len > out_end - out) { \
259  ret = AVERROR(ENOMEM); \
260  goto error; \
261  } \
262  memmove(out, start, len); \
263  out += len; \
264  } while (0)
265  COPY(ub.url, keep);
266  COPY(uc.url, uc.path);
267 
268  use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path;
269  if (uc.path > uc.url)
270  use_base_path = 0;
271  if (URL_COMPONENT_HAVE(uc, path) && uc.path[0] == '/')
272  use_base_path = 0;
273  if (use_base_path) {
274  base_path_end = ub.url_component_end_path;
275  if (URL_COMPONENT_HAVE(uc, path))
276  while (base_path_end > ub.path && !strchr(base_separators, base_path_end[-1]))
277  base_path_end--;
278  }
279  if (keep > ub.path)
280  simplify_path = 0;
281  if (URL_COMPONENT_HAVE(uc, scheme))
282  simplify_path = 0;
283  if (URL_COMPONENT_HAVE(uc, authority))
284  simplify_path = 1;
285  /* No path at all, leave it */
286  if (!use_base_path && !URL_COMPONENT_HAVE(uc, path))
287  simplify_path = 0;
288 
289  if (simplify_path) {
290  const char *root = "/";
291  COPY(root, root + 1);
292  path = out;
293  if (use_base_path) {
294  ret = append_path(path, out_end, &out, ub.path, base_path_end);
295  if (ret < 0)
296  goto error;
297  }
298  if (URL_COMPONENT_HAVE(uc, path)) {
299  ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path);
300  if (ret < 0)
301  goto error;
302  }
303  } else {
304  if (use_base_path)
305  COPY(ub.path, base_path_end);
306  COPY(uc.path, uc.url_component_end_path);
307  }
308 
309  COPY(uc.url_component_end_path, uc.end);
310 #undef COPY
311  *out = 0;
312  return 0;
313 
314 error:
315  snprintf(buf, size, "invalid:%s",
316  ret == AVERROR(ENOMEM) ? "truncated" :
317  ret == AVERROR(EINVAL) ? "syntax_error" : "");
318  return ret;
319 }
320 
321 int ff_make_absolute_url(char *buf, int size, const char *base,
322  const char *rel)
323 {
324  return ff_make_absolute_url2(buf, size, base, rel, HAVE_DOS_PATHS);
325 }
326 
328 {
329  AVIODirEntry *entry = av_mallocz(sizeof(AVIODirEntry));
330  if (entry) {
331  entry->type = AVIO_ENTRY_UNKNOWN;
332  entry->size = -1;
333  entry->modification_timestamp = -1;
334  entry->access_timestamp = -1;
335  entry->status_change_timestamp = -1;
336  entry->user_id = -1;
337  entry->group_id = -1;
338  entry->filemode = -1;
339  }
340  return entry;
341 }
error
static void error(const char *err)
Definition: target_bsf_fuzzer.c:31
ff_make_absolute_url2
int ff_make_absolute_url2(char *buf, int size, const char *base, const char *rel, int handle_dos_paths)
Convert a relative url into an absolute url, given a base url.
Definition: url.c:195
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
FILE * out
Definition: movenc.c:54
URLComponents::url
const char * url
whole URL, for reference
Definition: url.h:370
URLComponents
Definition: url.h:369
find_delim
static const char * find_delim(const char *delim, const char *cur, const char *end)
Definition: url.c:84
AVIODirEntry::type
int type
Type of the entry.
Definition: avio.h:89
URLComponents::path
const char * path
Definition: url.h:376
base
uint8_t base
Definition: vp3data.h:128
av_strlcatf
size_t av_strlcatf(char *dst, size_t size, const char *fmt,...)
Definition: avstring.c:103
URL_COMPONENT_HAVE
#define URL_COMPONENT_HAVE(uc, component)
Definition: url.h:392
AVIO_ENTRY_UNKNOWN
@ AVIO_ENTRY_UNKNOWN
Definition: avio.h:68
URLComponents::fragment
const char * fragment
including initial '#' if present
Definition: url.h:378
fragment
Definition: dashdec.c:36
AVIODirEntry::access_timestamp
int64_t access_timestamp
Time of last access in microseconds since unix epoch, -1 if unknown.
Definition: avio.h:95
freeaddrinfo
#define freeaddrinfo
Definition: network.h:218
ub
#define ub(width, name)
Definition: cbs_h2645.c:400
AVIODirEntry::modification_timestamp
int64_t modification_timestamp
Time of last modification in microseconds since unix epoch, -1 if unknown.
Definition: avio.h:93
avassert.h
AI_NUMERICHOST
#define AI_NUMERICHOST
Definition: network.h:187
ff_url_join
int ff_url_join(char *str, int size, const char *proto, const char *authorization, const char *hostname, int port, const char *fmt,...)
Definition: url.c:40
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
URLComponents::query
const char * query
including initial '?' if present
Definition: url.h:377
append_path
static int append_path(char *root, char *out_end, char **rout, const char *in, const char *in_end)
Definition: url.c:166
NULL
#define NULL
Definition: coverity.c:32
AVIODirEntry::size
int64_t size
File size in bytes, -1 if unknown.
Definition: avio.h:92
URLComponents::authority
const char * authority
"//" if it is a real URL
Definition: url.h:372
URLComponents::end
const char * end
Definition: url.h:379
AVIODirEntry::group_id
int64_t group_id
Group ID of owner, -1 if unknown.
Definition: avio.h:100
COPY
#define COPY(start, end)
error.h
AVIODirEntry::filemode
int64_t filemode
Unix file mode, -1 if unknown.
Definition: avio.h:101
URLComponents::host
const char * host
Definition: url.h:374
size
int size
Definition: twinvq_data.h:10344
AVIODirEntry
Describes single entry of the directory.
Definition: avio.h:87
avio.h
AVIODirEntry::status_change_timestamp
int64_t status_change_timestamp
Time of last status change in microseconds since unix epoch, -1 if unknown.
Definition: avio.h:97
av_strstart
int av_strstart(const char *str, const char *pfx, const char **ptr)
Return non-zero if pfx is a prefix of str.
Definition: avstring.c:36
KEEP
#define KEEP(component, also)
getaddrinfo
#define getaddrinfo
Definition: network.h:217
AVIODirEntry::user_id
int64_t user_id
User ID of owner, -1 if unknown.
Definition: avio.h:99
ff_alloc_dir_entry
AVIODirEntry * ff_alloc_dir_entry(void)
Allocate directory entry with default values.
Definition: url.c:327
vsnprintf
#define vsnprintf
Definition: snprintf.h:36
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:56
url.h
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:254
len
int len
Definition: vorbis_enc_data.h:426
URLComponents::port
const char * port
including initial ':' if present
Definition: url.h:375
URLComponents::userinfo
const char * userinfo
including final '@' if present
Definition: url.h:373
ret
ret
Definition: filter_design.txt:187
av_strlcat
size_t av_strlcat(char *dst, const char *src, size_t size)
Append the string src to the string dst, but to a total length of no more than size - 1 bytes,...
Definition: avstring.c:95
network.h
URLComponents::scheme
const char * scheme
possibly including lavf-specific options
Definition: url.h:371
addrinfo::ai_flags
int ai_flags
Definition: network.h:138
mem.h
ff_make_absolute_url
int ff_make_absolute_url(char *buf, int size, const char *base, const char *rel)
Convert a relative url into an absolute url, given a base url.
Definition: url.c:321
d
d
Definition: ffmpeg_filter.c:368
avstring.h
is_fq_dos_path
static int is_fq_dos_path(const char *path)
Definition: url.c:154
addrinfo
Definition: network.h:137
ff_url_decompose
int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
Parse an URL to find the components.
Definition: url.c:91
snprintf
#define snprintf
Definition: snprintf.h:34