From 17bf35a3c7b46df7131681bcc5bee5f12e1caec4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 13 Sep 2012 14:21:44 -0700 Subject: [PATCH 01/11] grep: teach --debug option to dump the parse tree Our "grep" allows complex boolean expressions to be formed to match each individual line with operators like --and, '(', ')' and --not. Introduce the "--debug" option to show the parse tree to help people who want to debug and enhance it. Also "log" learns "--grep-debug" option to do the same. The command line parser to the log family is a lot more limited than the general "git grep" parser, but it has special handling for header matching (e.g. "--author"), and a parse tree is valuable when working on it. Note that "--all-match" is *not* any individual node in the parse tree. It is an instruction to the evaluator to check all the nodes in the top-level backbone have matched and reject a document as non-matching otherwise. Signed-off-by: Junio C Hamano --- builtin/grep.c | 3 ++ grep.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++-- grep.h | 1 + revision.c | 2 ++ 4 files changed, 96 insertions(+), 2 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index fe1726f5ef..8aea00c048 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -772,6 +772,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix) "indicate hit with exit status without output"), OPT_BOOLEAN(0, "all-match", &opt.all_match, "show only matches from files that match all patterns"), + { OPTION_SET_INT, 0, "debug", &opt.debug, NULL, + "show parse tree for grep expression", + PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, NULL, 1 }, OPT_GROUP(""), { OPTION_STRING, 'O', "open-files-in-pager", &show_in_pager, "pager", "show matching files in the pager", diff --git a/grep.c b/grep.c index 04e3ec6c6e..be15c4753d 100644 --- a/grep.c +++ b/grep.c @@ -332,6 +332,87 @@ static struct grep_expr *compile_pattern_expr(struct grep_pat **list) return compile_pattern_or(list); } +static void indent(int in) +{ + while (in-- > 0) + fputc(' ', stderr); +} + +static void dump_grep_pat(struct grep_pat *p) +{ + switch (p->token) { + case GREP_AND: fprintf(stderr, "*and*"); break; + case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break; + case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break; + case GREP_NOT: fprintf(stderr, "*not*"); break; + case GREP_OR: fprintf(stderr, "*or*"); break; + + case GREP_PATTERN: fprintf(stderr, "pattern"); break; + case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break; + case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break; + } + + switch (p->token) { + default: break; + case GREP_PATTERN_HEAD: + fprintf(stderr, "", p->field); break; + case GREP_PATTERN_BODY: + fprintf(stderr, ""); break; + } + switch (p->token) { + default: break; + case GREP_PATTERN_HEAD: + case GREP_PATTERN_BODY: + case GREP_PATTERN: + fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern); + break; + } + fputc('\n', stderr); +} + +static void dump_grep_expression_1(struct grep_expr *x, int in) +{ + indent(in); + switch (x->node) { + case GREP_NODE_TRUE: + fprintf(stderr, "true\n"); + break; + case GREP_NODE_ATOM: + dump_grep_pat(x->u.atom); + break; + case GREP_NODE_NOT: + fprintf(stderr, "(not\n"); + dump_grep_expression_1(x->u.unary, in+1); + indent(in); + fprintf(stderr, ")\n"); + break; + case GREP_NODE_AND: + fprintf(stderr, "(and\n"); + dump_grep_expression_1(x->u.binary.left, in+1); + dump_grep_expression_1(x->u.binary.right, in+1); + indent(in); + fprintf(stderr, ")\n"); + break; + case GREP_NODE_OR: + fprintf(stderr, "(or\n"); + dump_grep_expression_1(x->u.binary.left, in+1); + dump_grep_expression_1(x->u.binary.right, in+1); + indent(in); + fprintf(stderr, ")\n"); + break; + } +} + +void dump_grep_expression(struct grep_opt *opt) +{ + struct grep_expr *x = opt->pattern_expression; + + if (opt->all_match) + fprintf(stderr, "[all-match]\n"); + dump_grep_expression_1(x, 0); + fflush(NULL); +} + static struct grep_expr *grep_true_expr(void) { struct grep_expr *z = xcalloc(1, sizeof(*z)); @@ -395,7 +476,7 @@ static struct grep_expr *prep_header_patterns(struct grep_opt *opt) return header_expr; } -void compile_grep_patterns(struct grep_opt *opt) +static void compile_grep_patterns_real(struct grep_opt *opt) { struct grep_pat *p; struct grep_expr *header_expr = prep_header_patterns(opt); @@ -415,7 +496,7 @@ void compile_grep_patterns(struct grep_opt *opt) if (opt->all_match || header_expr) opt->extended = 1; - else if (!opt->extended) + else if (!opt->extended && !opt->debug) return; p = opt->pattern_list; @@ -435,6 +516,13 @@ void compile_grep_patterns(struct grep_opt *opt) opt->all_match = 1; } +void compile_grep_patterns(struct grep_opt *opt) +{ + compile_grep_patterns_real(opt); + if (opt->debug) + dump_grep_expression(opt); +} + static void free_pattern_expr(struct grep_expr *x) { switch (x->node) { diff --git a/grep.h b/grep.h index ed7de6bec8..bf5be5ada4 100644 --- a/grep.h +++ b/grep.h @@ -90,6 +90,7 @@ struct grep_opt { int word_regexp; int fixed; int all_match; + int debug; #define GREP_BINARY_DEFAULT 0 #define GREP_BINARY_NOMATCH 1 #define GREP_BINARY_TEXT 2 diff --git a/revision.c b/revision.c index 9a0d9c7de2..90376e8e19 100644 --- a/revision.c +++ b/revision.c @@ -1578,6 +1578,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if ((argcount = parse_long_opt("grep", argv, &optarg))) { add_message_grep(revs, optarg); return argcount; + } else if (!strcmp(arg, "--grep-debug")) { + revs->grep_filter.debug = 1; } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) { revs->grep_filter.regflags |= REG_EXTENDED; } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) { From 208f5aa42615671bae1e55de20a58d9ba046d3e8 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Fri, 14 Sep 2012 11:46:35 +0200 Subject: [PATCH 02/11] grep: show --debug output only once When threaded grep is in effect, the patterns are duplicated and recompiled for each thread. Avoid "--debug" output during the recompilation so that the output is given once instead of "1+nthreads" times. Signed-off-by: Michael J Gruber Signed-off-by: Junio C Hamano --- builtin/grep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/builtin/grep.c b/builtin/grep.c index 8aea00c048..a7e8df0d40 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -209,6 +209,7 @@ static void start_threads(struct grep_opt *opt) int err; struct grep_opt *o = grep_opt_dup(opt); o->output = strbuf_out; + o->debug = 0; compile_grep_patterns(o); err = pthread_create(&threads[i], NULL, run, o); From 13e4fc7e0197ca752cf46674392bf6ef6249e614 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 13 Sep 2012 16:26:57 -0700 Subject: [PATCH 03/11] log --grep/--author: honor --all-match honored for multiple --grep patterns When we have both header expression (which has to be an OR node by construction) and a pattern expression (which could be anything), we create a new top-level OR node to bind them together, and the resulting expression structure looks like this: OR / \ / \ pattern OR / \ / \ ..... committer OR / \ author TRUE The three elements on the top-level backbone that are inspected by the "all-match" logic are "pattern", "committer" and "author". When there are more than one elements in the "pattern", the top-level node of the "pattern" part of the subtree is an OR, and that node is inspected by "all-match". The result ends up ignoring the "--all-match" given from the command line. A match on either side of the pattern is considered a match, hence: git log --grep=A --grep=B --author=C --all-match shows the same "authored by C and has either A or B" that is correct only when run without "--all-match". Fix this by turning the resulting expression around when "--all-match" is in effect, like this: OR / \ / \ / OR committer / \ author \ pattern The set of nodes on the top-level backbone in the resulting expression becomes "committer", "author", and the nodes that are on the top-level backbone of the "pattern" subexpression. This makes the "all-match" logic inspect the same nodes in "pattern" as the case without the author and/or the committer restriction, and makes the earlier "log" example to show "authored by C and has A and has B", which is what the command line expects. Signed-off-by: Junio C Hamano --- grep.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/grep.c b/grep.c index be15c4753d..925aa92f89 100644 --- a/grep.c +++ b/grep.c @@ -476,6 +476,22 @@ static struct grep_expr *prep_header_patterns(struct grep_opt *opt) return header_expr; } +static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y) +{ + struct grep_expr *z = x; + + while (x) { + assert(x->node == GREP_NODE_OR); + if (x->u.binary.right && + x->u.binary.right->node == GREP_NODE_TRUE) { + x->u.binary.right = y; + break; + } + x = x->u.binary.right; + } + return z; +} + static void compile_grep_patterns_real(struct grep_opt *opt) { struct grep_pat *p; @@ -510,6 +526,9 @@ static void compile_grep_patterns_real(struct grep_opt *opt) if (!opt->pattern_expression) opt->pattern_expression = header_expr; + else if (opt->all_match) + opt->pattern_expression = grep_splice_or(header_expr, + opt->pattern_expression); else opt->pattern_expression = grep_or_expr(opt->pattern_expression, header_expr); From a23e3138cb4ddd57e9cbb221e8cbfaf233111d2c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 13 Sep 2012 18:54:30 -0700 Subject: [PATCH 04/11] log: document use of multiple commit limiting options Generally speaking, using more options will further narrow the selection, but there are a few exceptions. Document them. Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 1ae3c899ef..c828408a80 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -3,8 +3,15 @@ Commit Limiting Besides specifying a range of commits that should be listed using the special notations explained in the description, additional commit -limiting may be applied. Note that they are applied before commit -ordering and formatting options, such as '--reverse'. +limiting may be applied. + +Using more options generally further limits the output (e.g. +`--since=` limits to commits newer than ``, and using it +with `--grep=` further limits to commits whose log message +has a line that matches ``), unless otherwise noted. + +Note that these are applied before commit +ordering and formatting options, such as `--reverse`. -- @@ -38,16 +45,22 @@ endif::git-rev-list[] --committer=:: Limit the commits output to ones with author/committer - header lines that match the specified pattern (regular expression). + header lines that match the specified pattern (regular + expression). With more than one `--author=`, + commits whose author matches any of the given patterns are + chosen (similarly for multiple `--committer=`). --grep=:: Limit the commits output to ones with log message that - matches the specified pattern (regular expression). + matches the specified pattern (regular expression). With + more than one `--grep=`, commits whose message + matches any of the given patterns are chosen (but see + `--all-match`). --all-match:: Limit the commits output to ones that match all given --grep, - --author and --committer instead of ones that match at least one. + instead of ones that match at least one. -i:: --regexp-ignore-case:: From 07a7d656dd550e24e8b9a1802665aa0abba435ee Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 15 Sep 2012 14:04:36 -0700 Subject: [PATCH 05/11] grep.c: mark private file-scope symbols as static Signed-off-by: Junio C Hamano --- grep.c | 6 +++++- grep.h | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/grep.c b/grep.c index 925aa92f89..c7f8a47505 100644 --- a/grep.c +++ b/grep.c @@ -3,6 +3,10 @@ #include "userdiff.h" #include "xdiff-interface.h" +static int grep_source_load(struct grep_source *gs); +static int grep_source_is_binary(struct grep_source *gs); + + static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t, @@ -403,7 +407,7 @@ static void dump_grep_expression_1(struct grep_expr *x, int in) } } -void dump_grep_expression(struct grep_opt *opt) +static void dump_grep_expression(struct grep_opt *opt) { struct grep_expr *x = opt->pattern_expression; diff --git a/grep.h b/grep.h index bf5be5ada4..d66b19712c 100644 --- a/grep.h +++ b/grep.h @@ -149,11 +149,10 @@ struct grep_source { void grep_source_init(struct grep_source *gs, enum grep_source_type type, const char *name, const void *identifier); -int grep_source_load(struct grep_source *gs); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); void grep_source_load_driver(struct grep_source *gs); -int grep_source_is_binary(struct grep_source *gs); + int grep_source(struct grep_opt *opt, struct grep_source *gs); From b327bf74bd631790918d6730404eb97baec267c9 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Fri, 14 Sep 2012 11:46:39 +0200 Subject: [PATCH 06/11] t7810-grep: bring log --grep tests in common form The log --grep tests generate the expected out in different ways. Make them all use command blocks so that subshells are avoided and the expected output is easier to grasp visually. Signed-off-by: Michael J Gruber Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 24e9b1974d..180e998697 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -435,31 +435,41 @@ test_expect_success 'log grep setup' ' test_expect_success 'log grep (1)' ' git log --author=author --pretty=tformat:%s >actual && - ( echo third ; echo initial ) >expect && + { + echo third && echo initial + } >expect && test_cmp expect actual ' test_expect_success 'log grep (2)' ' git log --author=" * " -F --pretty=tformat:%s >actual && - ( echo second ) >expect && + { + echo second + } >expect && test_cmp expect actual ' test_expect_success 'log grep (3)' ' git log --author="^A U" --pretty=tformat:%s >actual && - ( echo third ; echo initial ) >expect && + { + echo third && echo initial + } >expect && test_cmp expect actual ' test_expect_success 'log grep (4)' ' git log --author="frotz\.com>$" --pretty=tformat:%s >actual && - ( echo second ) >expect && + { + echo second + } >expect && test_cmp expect actual ' test_expect_success 'log grep (5)' ' git log --author=Thor -F --pretty=tformat:%s >actual && - ( echo third ; echo initial ) >expect && + { + echo third && echo initial + } >expect && test_cmp expect actual ' @@ -473,7 +483,9 @@ test_expect_success 'log --grep --author implicitly uses all-match' ' # grep matches initial and second but not third # author matches only initial and third git log --author="A U Thor" --grep=s --grep=l --format=%s >actual && - echo initial >expect && + { + echo initial + } >expect && test_cmp expect actual ' From dfe3642515d3d2679bdc25c2be4d961201e9f095 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Fri, 14 Sep 2012 11:46:40 +0200 Subject: [PATCH 07/11] t7810-grep: test multiple --grep with and without --all-match Signed-off-by: Michael J Gruber Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 180e998697..b841909315 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -479,6 +479,22 @@ test_expect_success 'log grep (6)' ' test_cmp expect actual ' +test_expect_success 'log with multiple --grep uses union' ' + git log --grep=i --grep=r --format=%s >actual && + { + echo fourth && echo third && echo initial + } >expect && + test_cmp expect actual +' + +test_expect_success 'log --all-match with multiple --grep uses intersection' ' + git log --all-match --grep=i --grep=r --format=%s >actual && + { + echo third + } >expect && + test_cmp expect actual +' + test_expect_success 'log --grep --author implicitly uses all-match' ' # grep matches initial and second but not third # author matches only initial and third From 00f62a64d489592c97f6f171f9a19fdda530f3c5 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Fri, 14 Sep 2012 11:46:41 +0200 Subject: [PATCH 08/11] t7810-grep: test multiple --author with --all-match The "--all-match" option is about "--grep", and does not affect how "--author" or "--committer" limitation is applied. Signed-off-by: Michael J Gruber Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index b841909315..be81d96e20 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -513,6 +513,14 @@ test_expect_success 'log with multiple --author uses union' ' test_cmp expect actual ' +test_expect_success 'log --all-match with multiple --author still uses union' ' + git log --all-match --author="Thor" --author="Aster" --format=%s >actual && + { + echo third && echo second && echo initial + } >expect && + test_cmp expect actual +' + test_expect_success 'log with --grep and multiple --author uses all-match' ' git log --author="Thor" --author="Night" --grep=i --format=%s >actual && { From 2cb03e76a0e65920a0790d5b128fde9185e0e8fe Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Fri, 14 Sep 2012 11:46:42 +0200 Subject: [PATCH 09/11] t7810-grep: test interaction of multiple --grep and --author options There are tests for this interaction already. Restructure slightly and avoid any claims about --all-match. Signed-off-by: Michael J Gruber Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index be81d96e20..f6edb4d6dc 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -495,16 +495,6 @@ test_expect_success 'log --all-match with multiple --grep uses intersection' ' test_cmp expect actual ' -test_expect_success 'log --grep --author implicitly uses all-match' ' - # grep matches initial and second but not third - # author matches only initial and third - git log --author="A U Thor" --grep=s --grep=l --format=%s >actual && - { - echo initial - } >expect && - test_cmp expect actual -' - test_expect_success 'log with multiple --author uses union' ' git log --author="Thor" --author="Aster" --format=%s >actual && { @@ -521,17 +511,33 @@ test_expect_success 'log --all-match with multiple --author still uses union' ' test_cmp expect actual ' -test_expect_success 'log with --grep and multiple --author uses all-match' ' - git log --author="Thor" --author="Night" --grep=i --format=%s >actual && +test_expect_success 'log --grep --author uses intersection' ' + # grep matches only third and fourth + # author matches only initial and third + git log --author="A U Thor" --grep=r --format=%s >actual && { - echo third && echo initial + echo third } >expect && test_cmp expect actual ' -test_expect_success 'log with --grep and multiple --author uses all-match' ' - git log --author="Thor" --author="Night" --grep=q --format=%s >actual && - >expect && +test_expect_success 'log --grep --grep --author takes union of greps and intersects with author' ' + # grep matches initial and second but not third + # author matches only initial and third + git log --author="A U Thor" --grep=s --grep=l --format=%s >actual && + { + echo initial + } >expect && + test_cmp expect actual +' + +test_expect_success 'log --grep --author --author takes union of authors and intersects with grep' ' + # grep matches only initial and third + # author matches all but second + git log --author="Thor" --author="Night" --grep=i --format=%s >actual && + { + echo third && echo initial + } >expect && test_cmp expect actual ' From 39f2e017203695b9da2ad35589b0e58bcac6fdc8 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Fri, 14 Sep 2012 11:46:43 +0200 Subject: [PATCH 10/11] t7810-grep: test --all-match with multiple --grep and --author options The code used to have a bug that ignores "--all-match", that requires all "--grep" to have matched, when "--author" or "--committer" was used. Make sure the bug will not be reintroduced. Signed-off-by: Michael J Gruber Signed-off-by: Junio C Hamano --- t/t7810-grep.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index f6edb4d6dc..b5c488e3a5 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -531,6 +531,16 @@ test_expect_success 'log --grep --grep --author takes union of greps and interse test_cmp expect actual ' +test_expect_success 'log ---all-match -grep --author --author still takes union of authors and intersects with grep' ' + # grep matches only initial and third + # author matches all but second + git log --all-match --author="Thor" --author="Night" --grep=i --format=%s >actual && + { + echo third && echo initial + } >expect && + test_cmp expect actual +' + test_expect_success 'log --grep --author --author takes union of authors and intersects with grep' ' # grep matches only initial and third # author matches all but second @@ -541,6 +551,16 @@ test_expect_success 'log --grep --author --author takes union of authors and int test_cmp expect actual ' +test_expect_success 'log --all-match --grep --grep --author takes intersection' ' + # grep matches only third + # author matches only initial and third + git log --all-match --author="A U Thor" --grep=i --grep=r --format=%s >actual && + { + echo third + } >expect && + test_cmp expect actual +' + test_expect_success 'grep with CE_VALID file' ' git update-index --assume-unchanged t/t && rm t/t && From 3083301ead459a45059af3ee85fb51d5d737a74d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 20 Sep 2012 14:20:09 -0700 Subject: [PATCH 11/11] grep.c: make two symbols really file-scope static this time Adding a declaration at the beginning is not sufficient for obvious reasons. The definition has to be made static. Signed-off-by: Junio C Hamano --- grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index c7f8a47505..898be6ebfa 100644 --- a/grep.c +++ b/grep.c @@ -1469,7 +1469,7 @@ static int grep_source_load_file(struct grep_source *gs) return 0; } -int grep_source_load(struct grep_source *gs) +static int grep_source_load(struct grep_source *gs) { if (gs->buf) return 0; @@ -1497,7 +1497,7 @@ void grep_source_load_driver(struct grep_source *gs) grep_attr_unlock(); } -int grep_source_is_binary(struct grep_source *gs) +static int grep_source_is_binary(struct grep_source *gs) { grep_source_load_driver(gs); if (gs->driver->binary != -1)