From a0fe2b0d2329d38a08c03427917b21be818bec1f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 14 Feb 2017 01:02:38 -0500 Subject: [PATCH 1/7] grep: move thread initialization a little lower Originally, we set up the threads for grep before parsing the non-option arguments. In 53b8d931b (grep: disable threading in non-worktree case, 2011-12-12), the thread code got bumped lower in the function because it now needed to know whether we got any revision arguments. That put a big block of code in between the parsing of revs and the parsing of pathspecs, both of which share some loop variables. That makes it harder to read the code than the original, where the shared loops were right next to each other. Let's bump the thread initialization until after all of the parsing is done. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 2c727ef499..5a282c4d06 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1169,6 +1169,20 @@ int cmd_grep(int argc, const char **argv, const char *prefix) break; } + /* The rest are paths */ + if (!seen_dashdash) { + int j; + for (j = i; j < argc; j++) + verify_filename(prefix, argv[j], j == i); + } + + parse_pathspec(&pathspec, 0, + PATHSPEC_PREFER_CWD | + (opt.max_depth != -1 ? PATHSPEC_MAXDEPTH_VALID : 0), + prefix, argv + i); + pathspec.max_depth = opt.max_depth; + pathspec.recursive = 1; + #ifndef NO_PTHREADS if (list.nr || cached || show_in_pager) num_threads = 0; @@ -1190,20 +1204,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) } #endif - /* The rest are paths */ - if (!seen_dashdash) { - int j; - for (j = i; j < argc; j++) - verify_filename(prefix, argv[j], j == i); - } - - parse_pathspec(&pathspec, 0, - PATHSPEC_PREFER_CWD | - (opt.max_depth != -1 ? PATHSPEC_MAXDEPTH_VALID : 0), - prefix, argv + i); - pathspec.max_depth = opt.max_depth; - pathspec.recursive = 1; - if (recurse_submodules) { gitmodules_config(); compile_submodule_options(&opt, &pathspec, cached, untracked, From dca3b5f5cef8138e695f87e6cb37fb2063a10ea4 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 14 Feb 2017 01:03:03 -0500 Subject: [PATCH 2/7] grep: do not unnecessarily query repo for "--" When running a command of the form git grep --no-index pattern -- path in the absence of a Git repository, an error message will be printed: fatal: BUG: setup_git_env called without repository This is because "git grep" tries to interpret "--" as a rev. "git grep" has always tried to first interpret "--" as a rev for at least a few years, but this issue was upgraded from a pessimization to a bug in commit 59332d1 ("Resurrect "git grep --no-index"", 2010-02-06), which calls get_sha1 regardless of whether --no-index was specified. This bug appeared to be benign until commit b1ef400 ("setup_git_env: avoid blind fall-back to ".git"", 2016-10-20) when Git was taught to die in this situation. (This "git grep" bug appears to be one of the bugs that commit b1ef400 is meant to flush out.) Therefore, always interpret "--" as signaling the end of options, instead of trying to interpret it as a rev first. Signed-off-by: Jonathan Tan Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 9 +++++---- t/t7810-grep.sh | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 5a282c4d06..081e1b57a1 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1154,6 +1154,11 @@ int cmd_grep(int argc, const char **argv, const char *prefix) const char *arg = argv[i]; unsigned char sha1[20]; struct object_context oc; + if (!strcmp(arg, "--")) { + i++; + seen_dashdash = 1; + break; + } /* Is it a rev? */ if (!get_sha1_with_context(arg, 0, sha1, &oc)) { struct object *object = parse_object_or_die(sha1, arg); @@ -1162,10 +1167,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) add_object_array_with_path(object, arg, &list, oc.mode, oc.path); continue; } - if (!strcmp(arg, "--")) { - i++; - seen_dashdash = 1; - } break; } diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 19f0108f8a..2c1f7373e6 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -982,6 +982,21 @@ test_expect_success 'grep -e -- -- path' ' test_cmp expected actual ' +test_expect_success 'grep --no-index pattern -- path' ' + rm -fr non && + mkdir -p non/git && + ( + GIT_CEILING_DIRECTORIES="$(pwd)/non" && + export GIT_CEILING_DIRECTORIES && + cd non/git && + echo hello >hello && + echo goodbye >goodbye && + echo hello:hello >expect && + git grep --no-index o -- hello >actual && + test_cmp expect actual + ) +' + cat >expected < Date: Tue, 14 Feb 2017 01:04:17 -0500 Subject: [PATCH 3/7] grep: re-order rev-parsing loop We loop over the arguments, but every branch of the loop hits either a "continue" or a "break". Surely we can make this simpler. The final conditional is: if (arg is a rev) { ... handle rev ... continue; } break; We can rewrite this as: if (arg is not a rev) break; ... handle rev ... That makes the flow a little bit simpler, and will make things much easier to follow when we add more logic in future patches. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 081e1b57a1..461347adb0 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1154,20 +1154,22 @@ int cmd_grep(int argc, const char **argv, const char *prefix) const char *arg = argv[i]; unsigned char sha1[20]; struct object_context oc; + struct object *object; + if (!strcmp(arg, "--")) { i++; seen_dashdash = 1; break; } - /* Is it a rev? */ - if (!get_sha1_with_context(arg, 0, sha1, &oc)) { - struct object *object = parse_object_or_die(sha1, arg); - if (!seen_dashdash) - verify_non_filename(prefix, arg); - add_object_array_with_path(object, arg, &list, oc.mode, oc.path); - continue; - } - break; + + /* Stop at the first non-rev */ + if (get_sha1_with_context(arg, 0, sha1, &oc)) + break; + + object = parse_object_or_die(sha1, arg); + if (!seen_dashdash) + verify_non_filename(prefix, arg); + add_object_array_with_path(object, arg, &list, oc.mode, oc.path); } /* The rest are paths */ From b5b81136da11638bdf1e336d9ec371b820fbf412 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 14 Feb 2017 01:05:55 -0500 Subject: [PATCH 4/7] grep: fix "--" rev/pathspec disambiguation If we see "git grep pattern rev -- file" then we apply the usual rev/pathspec disambiguation rules: any "rev" before the "--" must be a revision, and we do not need to apply the verify_non_filename() check. But there are two bugs here: 1. We keep a seen_dashdash flag to handle this case, but we set it in the same left-to-right pass over the arguments in which we parse "rev". So when we see "rev", we do not yet know that there is a "--", and we mistakenly complain if there is a matching file. We can fix this by making a preliminary pass over the arguments to find the "--", and only then checking the rev arguments. 2. If we can't resolve "rev" but there isn't a dashdash, that's OK. We treat it like a path, and complain later if it doesn't exist. But if there _is_ a dashdash, then we know it must be a rev, and should treat it as such, complaining if it does not resolve. The current code instead ignores it and tries to treat it like a path. This patch fixes both bugs, and tries to comment the parsing flow a bit better. It adds tests that cover the two bugs, but also some related situations (which already worked, but this confirms that our fixes did not break anything). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 29 ++++++++++++++++++++++++----- t/t7810-grep.sh | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 461347adb0..e83b33bdaa 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1149,7 +1149,22 @@ int cmd_grep(int argc, const char **argv, const char *prefix) compile_grep_patterns(&opt); - /* Check revs and then paths */ + /* + * We have to find "--" in a separate pass, because its presence + * influences how we will parse arguments that come before it. + */ + for (i = 0; i < argc; i++) { + if (!strcmp(argv[i], "--")) { + seen_dashdash = 1; + break; + } + } + + /* + * Resolve any rev arguments. If we have a dashdash, then everything up + * to it must resolve as a rev. If not, then we stop at the first + * non-rev and assume everything else is a path. + */ for (i = 0; i < argc; i++) { const char *arg = argv[i]; unsigned char sha1[20]; @@ -1158,13 +1173,14 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!strcmp(arg, "--")) { i++; - seen_dashdash = 1; break; } - /* Stop at the first non-rev */ - if (get_sha1_with_context(arg, 0, sha1, &oc)) + if (get_sha1_with_context(arg, 0, sha1, &oc)) { + if (seen_dashdash) + die(_("unable to resolve revision: %s"), arg); break; + } object = parse_object_or_die(sha1, arg); if (!seen_dashdash) @@ -1172,7 +1188,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix) add_object_array_with_path(object, arg, &list, oc.mode, oc.path); } - /* The rest are paths */ + /* + * Anything left over is presumed to be a path. But in the non-dashdash + * "do what I mean" case, we verify and complain when that isn't true. + */ if (!seen_dashdash) { int j; for (j = i; j < argc; j++) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 2c1f7373e6..a6011f9b1d 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -982,6 +982,39 @@ test_expect_success 'grep -e -- -- path' ' test_cmp expected actual ' +test_expect_success 'dashdash disambiguates rev as rev' ' + test_when_finished "rm -f master" && + echo content >master && + echo master:hello.c >expect && + git grep -l o master -- hello.c >actual && + test_cmp expect actual +' + +test_expect_success 'dashdash disambiguates pathspec as pathspec' ' + test_when_finished "git rm -f master" && + echo content >master && + git add master && + echo master:content >expect && + git grep o -- master >actual && + test_cmp expect actual +' + +test_expect_success 'report bogus arg without dashdash' ' + test_must_fail git grep o does-not-exist +' + +test_expect_success 'report bogus rev with dashdash' ' + test_must_fail git grep o hello.c -- +' + +test_expect_success 'allow non-existent path with dashdash' ' + # We need a real match so grep exits with success. + tree=$(git ls-tree HEAD | + sed s/hello.c/not-in-working-tree/ | + git mktree) && + git grep o "$tree" -- not-in-working-tree +' + test_expect_success 'grep --no-index pattern -- path' ' rm -fr non && mkdir -p non/git && From d0ffc069331b490ca15418ca7025d5b9024fbad5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 14 Feb 2017 01:07:29 -0500 Subject: [PATCH 5/7] grep: avoid resolving revision names in --no-index case We disallow the use of revisions with --no-index, but we don't actually check and complain until well after we've parsed the revisions. This is the cause of a few problems: 1. We shouldn't be calling get_sha1() at all when we aren't in a repository, as it might access the ref or object databases. For now, this should generally just return failure, but eventually it will become a BUG(). 2. When there's a "--" disambiguator and you're outside a repository, we'll complain early with "unable to resolve revision". But we can give a much more specific error. 3. When there isn't a "--" disambiguator, we still do the normal rev/path checks. This is silly, as we know we cannot have any revs with --no-index. Everything we see must be a path. Outside of a repository this doesn't matter (since we know it won't resolve), but inside one, we may complain unnecessarily if a filename happens to also match a refname. This patch skips the get_sha1() call entirely in the no-index case, and behaves as if it failed (with the exception of giving a better error message). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 6 ++++++ t/t7810-grep.sh | 13 +++++++++++++ 2 files changed, 19 insertions(+) diff --git a/builtin/grep.c b/builtin/grep.c index e83b33bdaa..c4c6325941 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1176,6 +1176,12 @@ int cmd_grep(int argc, const char **argv, const char *prefix) break; } + if (!use_index) { + if (seen_dashdash) + die(_("--no-index cannot be used with revs")); + break; + } + if (get_sha1_with_context(arg, 0, sha1, &oc)) { if (seen_dashdash) die(_("unable to resolve revision: %s"), arg); diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index a6011f9b1d..c051c7ee80 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -1030,6 +1030,19 @@ test_expect_success 'grep --no-index pattern -- path' ' ) ' +test_expect_success 'grep --no-index complains of revs' ' + test_must_fail git grep --no-index o master -- 2>err && + test_i18ngrep "no-index cannot be used with revs" err +' + +test_expect_success 'grep --no-index prefers paths to revs' ' + test_when_finished "rm -f master" && + echo content >master && + echo master:content >expect && + git grep --no-index o master >actual && + test_cmp expect actual +' + cat >expected < Date: Tue, 14 Feb 2017 01:08:09 -0500 Subject: [PATCH 6/7] grep: do not diagnose misspelt revs with --no-index If we are using --no-index, then our arguments cannot be revs in the first place. Not only is it pointless to diagnose them, but if we are not in a repository, we should not be trying to resolve any names. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 2 +- t/t7810-grep.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/builtin/grep.c b/builtin/grep.c index c4c6325941..1454bef496 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1201,7 +1201,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!seen_dashdash) { int j; for (j = i; j < argc; j++) - verify_filename(prefix, argv[j], j == i); + verify_filename(prefix, argv[j], j == i && use_index); } parse_pathspec(&pathspec, 0, diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index c051c7ee80..0ff9f6cae8 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -1043,6 +1043,11 @@ test_expect_success 'grep --no-index prefers paths to revs' ' test_cmp expect actual ' +test_expect_success 'grep --no-index does not "diagnose" revs' ' + test_must_fail git grep --no-index o :1:hello.c 2>err && + test_i18ngrep ! -i "did you mean" err +' + cat >expected < Date: Tue, 14 Feb 2017 16:54:36 -0500 Subject: [PATCH 7/7] grep: treat revs the same for --untracked as for --no-index git-grep has always disallowed grepping in a tree (as opposed to the working directory) with both --untracked and --no-index. But we traditionally did so by first collecting the revs, and then complaining when any were provided. The --no-index option recently learned to detect revs much earlier. This has two user-visible effects: - we don't bother to resolve revision names at all. So when there's a rev/path ambiguity, we always choose to treat it as a path. - likewise, when you do specify a revision without "--", the error you get is "no such path" and not "--untracked cannot be used with revs". The rationale for doing this with --no-index is that it is meant to be used outside a repository, and so parsing revs at all does not make sense. This patch gives --untracked the same treatment. While it _is_ meant to be used in a repository, it is explicitly about grepping the non-repository contents. Telling the user "we found a rev, but you are not allowed to use revs" is not really helpful compared to "we treated your argument as a path, and could not find it". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 10 +++++----- t/t7810-grep.sh | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 1454bef496..9304c33e75 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -967,6 +967,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) int dummy; int use_index = 1; int pattern_type_arg = GREP_PATTERN_TYPE_UNSPECIFIED; + int allow_revs; struct option options[] = { OPT_BOOL(0, "cached", &cached, @@ -1165,6 +1166,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) * to it must resolve as a rev. If not, then we stop at the first * non-rev and assume everything else is a path. */ + allow_revs = use_index && !untracked; for (i = 0; i < argc; i++) { const char *arg = argv[i]; unsigned char sha1[20]; @@ -1176,9 +1178,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix) break; } - if (!use_index) { + if (!allow_revs) { if (seen_dashdash) - die(_("--no-index cannot be used with revs")); + die(_("--no-index or --untracked cannot be used with revs")); break; } @@ -1201,7 +1203,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!seen_dashdash) { int j; for (j = i; j < argc; j++) - verify_filename(prefix, argv[j], j == i && use_index); + verify_filename(prefix, argv[j], j == i && allow_revs); } parse_pathspec(&pathspec, 0, @@ -1273,8 +1275,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!use_index || untracked) { int use_exclude = (opt_exclude < 0) ? use_index : !!opt_exclude; - if (list.nr) - die(_("--no-index or --untracked cannot be used with revs.")); hit = grep_directory(&opt, &pathspec, use_exclude, use_index); } else if (0 <= opt_exclude) { die(_("--[no-]exclude-standard cannot be used for tracked contents.")); diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 0ff9f6cae8..cee42097b0 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -1032,7 +1032,7 @@ test_expect_success 'grep --no-index pattern -- path' ' test_expect_success 'grep --no-index complains of revs' ' test_must_fail git grep --no-index o master -- 2>err && - test_i18ngrep "no-index cannot be used with revs" err + test_i18ngrep "cannot be used with revs" err ' test_expect_success 'grep --no-index prefers paths to revs' '