urlmatch: define url_parse function
Define url_parse, a general parsing function that supports all Git URLs
including scp style URLs such as hostname:~user/repo.
It is adapted from the algorithm in connect.c's parse_connect_url
and reuses the shared enum url_scheme and url_get_scheme function
that previous commits made available in url.h. The new parser and
the connect path agree on scheme classification. url_parse has the
same interface as url_normalize and uses the same data structures.
Both functions accept the same URL forms with one deliberate
exception. Bare local paths such as "/abs/path", "./rel"
or "repo" are accepted by parse_connect_url as URL_SCHEME_LOCAL,
but rejected by url_parse because url_normalize requires a URL
with a scheme://host form. A consumer that wants to handle both
URLs and local paths needs to dispatch on url_is_local_not_ssh
before calling url_parse, just as the connect path does internally.
The duplication with parse_connect_url is intentional.
The two functions have different contracts:
- parse_connect_url
Calls die() on an unknown scheme
and returns NUL-terminated host/path
strings for the connect path
- url_parse
Returns NULL on failure while populating
out_info->err, and exposes components
as offset/length pairs into the normalized
URL buffer, matching url_normalize.
Reconciling both is possible, but not in the scope
of the current patch set.
Signed-off-by: Matheus Afonso Martins Moreira <matheus@matheusmoreira.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
parent
46d6fb752e
commit
18a8281712
|
|
@ -245,3 +245,48 @@ void test_urlmatch_normalization__equivalents(void)
|
|||
compare_normalized_urls("https://@x.y/^/../abc", "httpS://@x.y:0443/abc", 1);
|
||||
compare_normalized_urls("https://@x.y/^/..", "httpS://@x.y:0443/", 1);
|
||||
}
|
||||
|
||||
static void check_parsed_path(const char *url, const char *expected_path)
|
||||
{
|
||||
struct url_info info;
|
||||
char *parsed = url_parse(url, &info);
|
||||
char *path;
|
||||
|
||||
cl_assert(parsed != NULL);
|
||||
path = xstrndup(parsed + info.path_off, info.path_len);
|
||||
cl_assert_equal_s(path, expected_path);
|
||||
free(path);
|
||||
free(parsed);
|
||||
}
|
||||
|
||||
void test_urlmatch_normalization__parse_scp(void)
|
||||
{
|
||||
check_parsed_path("host:path", "/path");
|
||||
check_parsed_path("user@host:path", "/path");
|
||||
check_parsed_path("host:~user/repo", "~user/repo");
|
||||
check_parsed_path("user@host:~user/repo", "~user/repo");
|
||||
check_parsed_path("[host]:src", "/src");
|
||||
check_parsed_path("[host:123]:src", "/src");
|
||||
check_parsed_path("[::1]:repo", "/repo");
|
||||
check_parsed_path("user@[::1]:repo", "/repo");
|
||||
}
|
||||
|
||||
void test_urlmatch_normalization__parse_url_form(void)
|
||||
{
|
||||
check_parsed_path("ssh://host/repo", "/repo");
|
||||
check_parsed_path("ssh://host/~user/repo", "~user/repo");
|
||||
check_parsed_path("git://host:9418/repo", "/repo");
|
||||
check_parsed_path("git://host/~user/repo", "~user/repo");
|
||||
check_parsed_path("ssh://[::1]:1234/repo", "/repo");
|
||||
check_parsed_path("http://[2001:db8::1]/repo", "/repo");
|
||||
}
|
||||
|
||||
void test_urlmatch_normalization__parse_strips_query_and_fragment(void)
|
||||
{
|
||||
check_parsed_path("ssh://host/~user/repo?q", "~user/repo");
|
||||
check_parsed_path("ssh://host/~user/repo#frag", "~user/repo");
|
||||
check_parsed_path("git://host/~user/repo?q", "~user/repo");
|
||||
check_parsed_path("user@host:~user/repo?q", "~user/repo");
|
||||
check_parsed_path("https://host/repo?q", "/repo");
|
||||
check_parsed_path("https://host/repo#frag", "/repo");
|
||||
}
|
||||
|
|
|
|||
127
urlmatch.c
127
urlmatch.c
|
|
@ -5,6 +5,7 @@
|
|||
#include "hex-ll.h"
|
||||
#include "strbuf.h"
|
||||
#include "urlmatch.h"
|
||||
#include "url.h"
|
||||
|
||||
#define URL_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
#define URL_DIGIT "0123456789"
|
||||
|
|
@ -440,6 +441,132 @@ char *url_normalize(const char *url, struct url_info *out_info)
|
|||
return url_normalize_1(url, out_info, 0);
|
||||
}
|
||||
|
||||
char *url_parse(const char *url_orig, struct url_info *out_info)
|
||||
{
|
||||
struct strbuf url;
|
||||
char *host, *separator;
|
||||
char *detached, *normalized;
|
||||
char *url_decoded;
|
||||
enum url_scheme scheme = URL_SCHEME_LOCAL;
|
||||
struct url_info local_info;
|
||||
struct url_info *info = out_info ? out_info : &local_info;
|
||||
bool scp_syntax = false;
|
||||
|
||||
if (is_url(url_orig))
|
||||
url_decoded = url_decode(url_orig);
|
||||
else
|
||||
url_decoded = xstrdup(url_orig);
|
||||
|
||||
strbuf_init(&url, strlen(url_decoded) + sizeof("ssh://"));
|
||||
strbuf_addstr(&url, url_decoded);
|
||||
free(url_decoded);
|
||||
|
||||
host = strstr(url.buf, "://");
|
||||
if (host) {
|
||||
/*
|
||||
* Temporarily NUL-terminate the scheme name
|
||||
* so we can pass it to url_get_scheme(),
|
||||
* then restore the ':' so the buffer
|
||||
* is intact for url_normalize() below.
|
||||
*/
|
||||
char saved = *host;
|
||||
*host = '\0';
|
||||
scheme = url_get_scheme(url.buf);
|
||||
*host = saved;
|
||||
host += 3;
|
||||
} else {
|
||||
if (!url_is_local_not_ssh(url.buf)) {
|
||||
scp_syntax = true;
|
||||
scheme = URL_SCHEME_SSH;
|
||||
strbuf_insertstr(&url, 0, "ssh://");
|
||||
host = url.buf + strlen("ssh://");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Path starts after ':' in scp style SSH URLs.
|
||||
*
|
||||
* The host portion can begin with an optional "user@",
|
||||
* and the host itself can be wrapped in '[' ']' brackets.
|
||||
* The bracket form is git's legacy way of supporting:
|
||||
*
|
||||
* - IPv6 literals: [::1]:repo
|
||||
* - host:port pairs in the short form: [myhost:123]:src
|
||||
* - Plain hostnames that happen to need bracketing: [host]:path
|
||||
*
|
||||
* Treat '[' followed by 0 or 1 inner colons as the host:port
|
||||
* or plain hostname form and strip the brackets so url_normalize
|
||||
* sees host[:port] natively. Two or more inner colons mark an
|
||||
* IPv6 literal: keep the brackets for url_normalize to recognize.
|
||||
*
|
||||
* The scp path separator is the ':' that follows the host part,
|
||||
* and we must skip over user@ and any '[...]' before searching.
|
||||
*/
|
||||
if (scp_syntax) {
|
||||
char *user_at;
|
||||
char *host_start;
|
||||
char *bracket_end;
|
||||
|
||||
user_at = strchr(host, '@');
|
||||
host_start = user_at ? user_at + 1 : host;
|
||||
|
||||
if (*host_start == '[') {
|
||||
char *p;
|
||||
int inner_colons;
|
||||
|
||||
bracket_end = strchr(host_start, ']');
|
||||
inner_colons = 0;
|
||||
for (p = host_start + 1; bracket_end && p < bracket_end; p++)
|
||||
if (*p == ':')
|
||||
inner_colons++;
|
||||
|
||||
if (bracket_end && inner_colons <= 1) {
|
||||
size_t close_off = bracket_end - url.buf;
|
||||
size_t open_off = host_start - url.buf;
|
||||
strbuf_remove(&url, close_off, 1);
|
||||
strbuf_remove(&url, open_off, 1);
|
||||
separator = url.buf + close_off - 1;
|
||||
} else if (bracket_end) {
|
||||
separator = strchr(bracket_end + 1, ':');
|
||||
} else {
|
||||
separator = strchr(host_start, ':');
|
||||
}
|
||||
} else {
|
||||
separator = strchr(host_start, ':');
|
||||
}
|
||||
|
||||
if (separator) {
|
||||
if (separator[1] == '/')
|
||||
strbuf_remove(&url, separator - url.buf, 1);
|
||||
else
|
||||
*separator = '/';
|
||||
}
|
||||
}
|
||||
|
||||
detached = strbuf_detach(&url, NULL);
|
||||
normalized = url_normalize(detached, info);
|
||||
free(detached);
|
||||
|
||||
if (!normalized)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Point path to ~ for URLs like this:
|
||||
*
|
||||
* ssh://host.xz/~user/repo
|
||||
* git://host.xz/~user/repo
|
||||
* host.xz:~user/repo
|
||||
*/
|
||||
if (scheme == URL_SCHEME_GIT || scheme == URL_SCHEME_SSH) {
|
||||
if (normalized[info->path_off + 1] == '~') {
|
||||
info->path_off++;
|
||||
info->path_len--;
|
||||
}
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
static size_t url_match_prefix(const char *url,
|
||||
const char *url_prefix,
|
||||
size_t url_prefix_len)
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ struct url_info {
|
|||
};
|
||||
|
||||
char *url_normalize(const char *, struct url_info *);
|
||||
char *url_parse(const char *, struct url_info *);
|
||||
|
||||
struct urlmatch_item {
|
||||
size_t hostmatch_len;
|
||||
|
|
|
|||
Loading…
Reference in New Issue