config: add helper to normalize and match URLs
Some http.* configuration variables need to take values customized
for the URL we are talking to. We may want to set http.sslVerify to
true in general but to false only for a certain site, for example,
with a configuration file like this:
[http]
sslVerify = true
[http "https://weak.example.com"]
sslVerify = false
and let the configuration machinery pick up the latter only when
talking to "https://weak.example.com". The latter needs to kick in
not only when the URL is exactly "https://weak.example.com", but
also is anything that "match" it, e.g.
https://weak.example.com/test
https://me@weak.example.com/test
The <url> in the configuration key consists of the following parts,
and is considered a match to the URL we are attempting to access
under certain conditions:
. Scheme (e.g., `https` in `https://example.com/`). This field
must match exactly between the config key and the URL.
. Host/domain name (e.g., `example.com` in `https://example.com/`).
This field must match exactly between the config key and the URL.
. Port number (e.g., `8080` in `http://example.com:8080/`). This
field must match exactly between the config key and the URL.
Omitted port numbers are automatically converted to the correct
default for the scheme before matching.
. Path (e.g., `repo.git` in `https://example.com/repo.git`). The
path field of the config key must match the path field of the
URL either exactly or as a prefix of slash-delimited path
elements. A config key with path `foo/` matches URL path
`foo/bar`. A prefix can only match on a slash (`/`) boundary.
Longer matches take precedence (so a config key with path
`foo/bar` is a better match to URL path `foo/bar` than a config
key with just path `foo/`).
. User name (e.g., `me` in `https://me@example.com/repo.git`). If
the config key has a user name, it must match the user name in
the URL exactly. If the config key does not have a user name,
that config key will match a URL with any user name (including
none), but at a lower precedence than a config key with a user
name.
Longer matches take precedence over shorter matches.
This step adds two helper functions `url_normalize()` and
`match_urls()` to help implement the above semantics. The
normalization rules are based on RFC 3986 and should result in any
two equivalent urls being a match.
Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
#ifndef URL_MATCH_H
|
|
|
|
#include "string-list.h"
|
|
|
|
|
|
|
|
struct url_info {
|
|
|
|
/* normalized url on success, must be freed, otherwise NULL */
|
|
|
|
char *url;
|
|
|
|
/* if !url, a brief reason for the failure, otherwise NULL */
|
|
|
|
const char *err;
|
|
|
|
|
|
|
|
/* the rest of the fields are only set if url != NULL */
|
|
|
|
|
|
|
|
size_t url_len; /* total length of url (which is now normalized) */
|
|
|
|
size_t scheme_len; /* length of scheme name (excluding final :) */
|
|
|
|
size_t user_off; /* offset into url to start of user name (0 => none) */
|
|
|
|
size_t user_len; /* length of user name; if user_off != 0 but
|
|
|
|
user_len == 0, an empty user name was given */
|
|
|
|
size_t passwd_off; /* offset into url to start of passwd (0 => none) */
|
|
|
|
size_t passwd_len; /* length of passwd; if passwd_off != 0 but
|
|
|
|
passwd_len == 0, an empty passwd was given */
|
|
|
|
size_t host_off; /* offset into url to start of host name (0 => none) */
|
|
|
|
size_t host_len; /* length of host name; this INCLUDES any ':portnum';
|
|
|
|
* file urls may have host_len == 0 */
|
|
|
|
size_t port_len; /* if a portnum is present (port_len != 0), it has
|
|
|
|
* this length (excluding the leading ':') at the
|
|
|
|
* end of the host name (always 0 for file urls) */
|
|
|
|
size_t path_off; /* offset into url to the start of the url path;
|
|
|
|
* this will always point to a '/' character
|
|
|
|
* after the url has been normalized */
|
|
|
|
size_t path_len; /* length of path portion excluding any trailing
|
|
|
|
* '?...' and '#...' portion; will always be >= 1 */
|
|
|
|
};
|
|
|
|
|
|
|
|
extern char *url_normalize(const char *, struct url_info *);
|
|
|
|
|
|
|
|
struct urlmatch_item {
|
|
|
|
size_t matched_len;
|
|
|
|
char user_matched;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct urlmatch_config {
|
|
|
|
struct string_list vars;
|
|
|
|
struct url_info url;
|
|
|
|
const char *section;
|
|
|
|
const char *key;
|
|
|
|
|
|
|
|
void *cb;
|
|
|
|
int (*collect_fn)(const char *var, const char *value, void *cb);
|
|
|
|
int (*cascade_fn)(const char *var, const char *value, void *cb);
|
|
|
|
};
|
|
|
|
|
|
|
|
extern int urlmatch_config_entry(const char *var, const char *value, void *cb);
|
|
|
|
|
config: add helper to normalize and match URLs
Some http.* configuration variables need to take values customized
for the URL we are talking to. We may want to set http.sslVerify to
true in general but to false only for a certain site, for example,
with a configuration file like this:
[http]
sslVerify = true
[http "https://weak.example.com"]
sslVerify = false
and let the configuration machinery pick up the latter only when
talking to "https://weak.example.com". The latter needs to kick in
not only when the URL is exactly "https://weak.example.com", but
also is anything that "match" it, e.g.
https://weak.example.com/test
https://me@weak.example.com/test
The <url> in the configuration key consists of the following parts,
and is considered a match to the URL we are attempting to access
under certain conditions:
. Scheme (e.g., `https` in `https://example.com/`). This field
must match exactly between the config key and the URL.
. Host/domain name (e.g., `example.com` in `https://example.com/`).
This field must match exactly between the config key and the URL.
. Port number (e.g., `8080` in `http://example.com:8080/`). This
field must match exactly between the config key and the URL.
Omitted port numbers are automatically converted to the correct
default for the scheme before matching.
. Path (e.g., `repo.git` in `https://example.com/repo.git`). The
path field of the config key must match the path field of the
URL either exactly or as a prefix of slash-delimited path
elements. A config key with path `foo/` matches URL path
`foo/bar`. A prefix can only match on a slash (`/`) boundary.
Longer matches take precedence (so a config key with path
`foo/bar` is a better match to URL path `foo/bar` than a config
key with just path `foo/`).
. User name (e.g., `me` in `https://me@example.com/repo.git`). If
the config key has a user name, it must match the user name in
the URL exactly. If the config key does not have a user name,
that config key will match a URL with any user name (including
none), but at a lower precedence than a config key with a user
name.
Longer matches take precedence over shorter matches.
This step adds two helper functions `url_normalize()` and
`match_urls()` to help implement the above semantics. The
normalization rules are based on RFC 3986 and should result in any
two equivalent urls being a match.
Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
#endif /* URL_MATCH_H */
|