From a39c14af82e973ba1502888e89585b7501721ede Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Jan 2014 03:26:33 -0500 Subject: [PATCH 1/5] interpret_branch_name: factor out upstream handling This function checks a few different @{}-constructs. The early part checks for and dispatches us to helpers for each construct, but the code for handling @{upstream} is inline. Let's factor this out into its own function. This makes interpret_branch_name more readable, and will make it much simpler to further refactor the function in future patches. While we're at it, let's also break apart the refactored code into a few helper functions. These will be useful if we eventually implement similar @{upstream}-like constructs. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 83 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index e9c299943b..03c574e295 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1048,6 +1048,54 @@ static int reinterpret(const char *name, int namelen, int len, struct strbuf *bu return ret - used + len; } +static void set_shortened_ref(struct strbuf *buf, const char *ref) +{ + char *s = shorten_unambiguous_ref(ref, 0); + strbuf_reset(buf); + strbuf_addstr(buf, s); + free(s); +} + +static const char *get_upstream_branch(const char *branch_buf, int len) +{ + char *branch = xstrndup(branch_buf, len); + struct branch *upstream = branch_get(*branch ? branch : NULL); + + /* + * Upstream can be NULL only if branch refers to HEAD and HEAD + * points to something different than a branch. + */ + if (!upstream) + die(_("HEAD does not point to a branch")); + if (!upstream->merge || !upstream->merge[0]->dst) { + if (!ref_exists(upstream->refname)) + die(_("No such branch: '%s'"), branch); + if (!upstream->merge) { + die(_("No upstream configured for branch '%s'"), + upstream->name); + } + die( + _("Upstream branch '%s' not stored as a remote-tracking branch"), + upstream->merge[0]->src); + } + free(branch); + + return upstream->merge[0]->dst; +} + +static int interpret_upstream_mark(const char *name, int namelen, + int at, struct strbuf *buf) +{ + int len; + + len = upstream_mark(name + at, namelen - at); + if (!len) + return -1; + + set_shortened_ref(buf, get_upstream_branch(name, at)); + return len + at; +} + /* * This reads short-hand syntax that not only evaluates to a commit * object name, but also can act as if the end user spelled the name @@ -1072,9 +1120,7 @@ static int reinterpret(const char *name, int namelen, int len, struct strbuf *bu int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) { char *cp; - struct branch *upstream; int len = interpret_nth_prior_checkout(name, buf); - int tmp_len; if (!namelen) namelen = strlen(name); @@ -1096,36 +1142,11 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) if (len > 0) return reinterpret(name, namelen, len, buf); - tmp_len = upstream_mark(cp, namelen - (cp - name)); - if (!tmp_len) - return -1; + len = interpret_upstream_mark(name, namelen, cp - name, buf); + if (len > 0) + return len; - len = cp + tmp_len - name; - cp = xstrndup(name, cp - name); - upstream = branch_get(*cp ? cp : NULL); - /* - * Upstream can be NULL only if cp refers to HEAD and HEAD - * points to something different than a branch. - */ - if (!upstream) - die(_("HEAD does not point to a branch")); - if (!upstream->merge || !upstream->merge[0]->dst) { - if (!ref_exists(upstream->refname)) - die(_("No such branch: '%s'"), cp); - if (!upstream->merge) { - die(_("No upstream configured for branch '%s'"), - upstream->name); - } - die( - _("Upstream branch '%s' not stored as a remote-tracking branch"), - upstream->merge[0]->src); - } - free(cp); - cp = shorten_unambiguous_ref(upstream->merge[0]->dst, 0); - strbuf_reset(buf); - strbuf_addstr(buf, cp); - free(cp); - return len; + return -1; } int strbuf_branchname(struct strbuf *sb, const char *name) From f278f40f09fd5a4e8e091be489bcb54230d2e3de Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Jan 2014 03:27:32 -0500 Subject: [PATCH 2/5] interpret_branch_name: rename "cp" variable to "at" In the original version of this function, "cp" acted as a pointer to many different things. Since the refactoring in the last patch, it only marks the at-sign in the string. Let's use a more descriptive variable name. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index 03c574e295..47a71e310e 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1119,7 +1119,7 @@ static int interpret_upstream_mark(const char *name, int namelen, */ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) { - char *cp; + char *at; int len = interpret_nth_prior_checkout(name, buf); if (!namelen) @@ -1134,15 +1134,15 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) return reinterpret(name, namelen, len, buf); } - cp = strchr(name, '@'); - if (!cp) + at = strchr(name, '@'); + if (!at) return -1; - len = interpret_empty_at(name, namelen, cp - name, buf); + len = interpret_empty_at(name, namelen, at - name, buf); if (len > 0) return reinterpret(name, namelen, len, buf); - len = interpret_upstream_mark(name, namelen, cp - name, buf); + len = interpret_upstream_mark(name, namelen, at - name, buf); if (len > 0) return len; From 8cd4249c4cdb19fce489a3f6ba31f499f4331341 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Jan 2014 03:31:57 -0500 Subject: [PATCH 3/5] interpret_branch_name: always respect "namelen" parameter interpret_branch_name gets passed a "name" buffer to parse, along with a "namelen" parameter representing its length. If "namelen" is zero, we fallback to the NUL-terminated string-length of "name". However, it does not necessarily follow that if we have gotten a non-zero "namelen", it is the NUL-terminated string-length of "name". E.g., when get_sha1() is parsing "foo:bar", we will be asked to operate only on the first three characters. Yet in interpret_branch_name and its helpers, we use string functions like strchr() to operate on "name", looking past the length we were given. This can result in us mis-parsing object names. We should instead be limiting our search to "namelen" bytes. There are three distinct types of object names this patch addresses: - The intrepret_empty_at helper uses strchr to find the next @-expression after our potential empty-at. In an expression like "@:foo@bar", it erroneously thinks that the second "@" is relevant, even if we were asked only to look at the first character. This case is easy to trigger (and we test it in this patch). - When finding the initial @-mark for @{upstream}, we use strchr. This means we might treat "foo:@{upstream}" as the upstream for "foo:", even though we were asked only to look at "foo". We cannot test this one in practice, because it is masked by another bug (which is fixed in the next patch). - The interpret_nth_prior_checkout helper did not receive the name length at all. This turns out not to be a problem in practice, though, because its parsing is so limited: it always starts from the far-left of the string, and will not tolerate a colon (which is currently the only way to get a smaller-than-strlen "namelen"). However, it's still worth fixing to make the code more obviously correct, and to future-proof us against callers with more exotic buffers. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 17 ++++++++++------- t/t1508-at-combinations.sh | 15 ++++++++++++++- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index 47a71e310e..afdff2f1d5 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -430,7 +430,7 @@ static inline int upstream_mark(const char *string, int len) } static int get_sha1_1(const char *name, int len, unsigned char *sha1, unsigned lookup_flags); -static int interpret_nth_prior_checkout(const char *name, struct strbuf *buf); +static int interpret_nth_prior_checkout(const char *name, int namelen, struct strbuf *buf); static int get_sha1_basic(const char *str, int len, unsigned char *sha1) { @@ -492,7 +492,7 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1) struct strbuf buf = STRBUF_INIT; int detached; - if (interpret_nth_prior_checkout(str, &buf) > 0) { + if (interpret_nth_prior_checkout(str, len, &buf) > 0) { detached = (buf.len == 40 && !get_sha1_hex(buf.buf, sha1)); strbuf_release(&buf); if (detached) @@ -931,7 +931,8 @@ static int grab_nth_branch_switch(unsigned char *osha1, unsigned char *nsha1, * Parse @{-N} syntax, return the number of characters parsed * if successful; otherwise signal an error with negative value. */ -static int interpret_nth_prior_checkout(const char *name, struct strbuf *buf) +static int interpret_nth_prior_checkout(const char *name, int namelen, + struct strbuf *buf) { long nth; int retval; @@ -939,9 +940,11 @@ static int interpret_nth_prior_checkout(const char *name, struct strbuf *buf) const char *brace; char *num_end; + if (namelen < 4) + return -1; if (name[0] != '@' || name[1] != '{' || name[2] != '-') return -1; - brace = strchr(name, '}'); + brace = memchr(name, '}', namelen); if (!brace) return -1; nth = strtol(name + 3, &num_end, 10); @@ -1014,7 +1017,7 @@ static int interpret_empty_at(const char *name, int namelen, int len, struct str return -1; /* make sure it's a single @, or @@{.*}, not @foo */ - next = strchr(name + len + 1, '@'); + next = memchr(name + len + 1, '@', namelen - len - 1); if (next && next[1] != '{') return -1; if (!next) @@ -1120,7 +1123,7 @@ static int interpret_upstream_mark(const char *name, int namelen, int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) { char *at; - int len = interpret_nth_prior_checkout(name, buf); + int len = interpret_nth_prior_checkout(name, namelen, buf); if (!namelen) namelen = strlen(name); @@ -1134,7 +1137,7 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) return reinterpret(name, namelen, len, buf); } - at = strchr(name, '@'); + at = memchr(name, '@', namelen); if (!at) return -1; diff --git a/t/t1508-at-combinations.sh b/t/t1508-at-combinations.sh index ceb844985f..078e1195df 100755 --- a/t/t1508-at-combinations.sh +++ b/t/t1508-at-combinations.sh @@ -9,8 +9,11 @@ check() { if test '$2' = 'commit' then git log -1 --format=%s '$1' >actual - else + elif test '$2' = 'ref' + then git rev-parse --symbolic-full-name '$1' >actual + else + git cat-file -p '$1' >actual fi && test_cmp expect actual " @@ -82,4 +85,14 @@ check HEAD ref refs/heads/old-branch check "HEAD@{1}" commit new-two check "@{1}" commit old-one +test_expect_success 'create path with @' ' + echo content >normal && + echo content >fun@ny && + git add normal fun@ny && + git commit -m "funny path" +' + +check "@:normal" blob content +check "@:fun@ny" blob content + test_done From 3f6eb30f1dbb6f8f715c9121bba43f2a1d294e28 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Jan 2014 03:37:23 -0500 Subject: [PATCH 4/5] interpret_branch_name: avoid @{upstream} past colon get_sha1() cannot currently parse a valid object name like "HEAD:@{upstream}" (assuming that such an oddly named file exists in the HEAD commit). It takes two passes to parse the string: 1. It first considers the whole thing as a ref, which results in looking for the upstream of "HEAD:". 2. It finds the colon, parses "HEAD" as a tree-ish, and then finds the path "@{upstream}" in the tree. For a path that looks like a normal reflog (e.g., "HEAD:@{yesterday}"), the first pass is a no-op. We try to dwim_ref("HEAD:"), that returns zero refs, and we proceed with colon-parsing. For "HEAD:@{upstream}", though, the first pass ends up in interpret_upstream_mark, which tries to find the branch "HEAD:". When it sees that the branch does not exist, it actually dies rather than returning an error to the caller. As a result, we never make it to the second pass. One obvious way of fixing this would be to teach interpret_upstream_mark to simply report "no, this isn't an upstream" in such a case. However, that would make the error-reporting for legitimate upstream cases significantly worse. Something like "bogus@{upstream}" would simply report "unknown revision: bogus@{upstream}", while the current code diagnoses a wide variety of possible misconfigurations (no such branch, branch exists but does not have upstream, etc). However, we can take advantage of the fact that a branch name cannot contain a colon. Therefore even if we find an upstream mark, any prefix with a colon must mean that the upstream mark we found is actually a pathname, and should be disregarded completely. This patch implements that logic. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 3 +++ t/t1507-rev-parse-upstream.sh | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/sha1_name.c b/sha1_name.c index afdff2f1d5..26a5811c84 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1095,6 +1095,9 @@ static int interpret_upstream_mark(const char *name, int namelen, if (!len) return -1; + if (memchr(name, ':', at)) + return -1; + set_shortened_ref(buf, get_upstream_branch(name, at)); return len + at; } diff --git a/t/t1507-rev-parse-upstream.sh b/t/t1507-rev-parse-upstream.sh index 2a19e797eb..cace1ca4ad 100755 --- a/t/t1507-rev-parse-upstream.sh +++ b/t/t1507-rev-parse-upstream.sh @@ -210,4 +210,20 @@ test_expect_success 'log -g other@{u}@{now}' ' test_cmp expect actual ' +test_expect_success '@{reflog}-parsing does not look beyond colon' ' + echo content >@{yesterday} && + git add @{yesterday} && + git commit -m "funny reflog file" && + git hash-object @{yesterday} >expect && + git rev-parse HEAD:@{yesterday} >actual +' + +test_expect_success '@{upstream}-parsing does not look beyond colon' ' + echo content >@{upstream} && + git add @{upstream} && + git commit -m "funny upstream file" && + git hash-object @{upstream} >expect && + git rev-parse HEAD:@{upstream} >actual +' + test_done From 9892d5d4541d93a8a6a4fd9ac4178d71d0d308e3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Jan 2014 03:40:46 -0500 Subject: [PATCH 5/5] interpret_branch_name: find all possible @-marks When we parse a string like "foo@{upstream}", we look for the first "@"-sign, and check to see if it is an upstream mark. However, since branch names can contain an @, we may also see "@foo@{upstream}". In this case, we check only the first @, and ignore the second. As a result, we do not find the upstream. We can solve this by iterating through all @-marks in the string, and seeing if any is a legitimate upstream or empty-at mark. Another strategy would be to parse from the right-hand side of the string. However, that does not work for the "empty_at" case, which allows "@@{upstream}". We need to find the left-most one in this case (and we then recurse as "HEAD@{upstream}"). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_name.c | 20 +++++++++++--------- t/t1507-rev-parse-upstream.sh | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/sha1_name.c b/sha1_name.c index 26a5811c84..15854e35ec 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1126,6 +1126,7 @@ static int interpret_upstream_mark(const char *name, int namelen, int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) { char *at; + const char *start; int len = interpret_nth_prior_checkout(name, namelen, buf); if (!namelen) @@ -1140,17 +1141,18 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf) return reinterpret(name, namelen, len, buf); } - at = memchr(name, '@', namelen); - if (!at) - return -1; + for (start = name; + (at = memchr(start, '@', namelen - (start - name))); + start = at + 1) { - len = interpret_empty_at(name, namelen, at - name, buf); - if (len > 0) - return reinterpret(name, namelen, len, buf); + len = interpret_empty_at(name, namelen, at - name, buf); + if (len > 0) + return reinterpret(name, namelen, len, buf); - len = interpret_upstream_mark(name, namelen, at - name, buf); - if (len > 0) - return len; + len = interpret_upstream_mark(name, namelen, at - name, buf); + if (len > 0) + return len; + } return -1; } diff --git a/t/t1507-rev-parse-upstream.sh b/t/t1507-rev-parse-upstream.sh index cace1ca4ad..178694ee63 100755 --- a/t/t1507-rev-parse-upstream.sh +++ b/t/t1507-rev-parse-upstream.sh @@ -17,6 +17,9 @@ test_expect_success 'setup' ' test_commit 4 && git branch --track my-side origin/side && git branch --track local-master master && + git branch --track fun@ny origin/side && + git branch --track @funny origin/side && + git branch --track funny@ origin/side && git remote add -t master master-only .. && git fetch master-only && git branch bad-upstream && @@ -54,6 +57,24 @@ test_expect_success 'my-side@{upstream} resolves to correct full name' ' test refs/remotes/origin/side = "$(full_name my-side@{u})" ' +test_expect_success 'upstream of branch with @ in middle' ' + full_name fun@ny@{u} >actual && + echo refs/remotes/origin/side >expect && + test_cmp expect actual +' + +test_expect_success 'upstream of branch with @ at start' ' + full_name @funny@{u} >actual && + echo refs/remotes/origin/side >expect && + test_cmp expect actual +' + +test_expect_success 'upstream of branch with @ at end' ' + full_name funny@@{u} >actual && + echo refs/remotes/origin/side >expect && + test_cmp expect actual +' + test_expect_success 'refs/heads/my-side@{upstream} does not resolve to my-side{upstream}' ' test_must_fail full_name refs/heads/my-side@{upstream} '