From e62ba4324421279d5c9ba95494dca4b4f0e10eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 29 Jun 2017 22:22:17 +0000 Subject: [PATCH 1/6] grep: remove redundant double assignment to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop assigning 0 to the extended_regexp_option field right after we've zeroed out the entire struct with memset() just a few lines earlier. Unlike some of the code being refactored in subsequent commits, this was always completely redundant. See the original code introduced in 84befcd0a4 ("grep: add a grep.patternType configuration setting", 2012-08-03). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 1 - 1 file changed, 1 deletion(-) diff --git a/grep.c b/grep.c index 98733db623..29439886e7 100644 --- a/grep.c +++ b/grep.c @@ -38,7 +38,6 @@ void init_grep_defaults(void) opt->regflags = REG_NEWLINE; opt->max_depth = -1; opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED; - opt->extended_regexp_option = 0; color_set(opt->color_context, ""); color_set(opt->color_filename, ""); color_set(opt->color_function, ""); From c7e3855112b106843011bc4862e259ccba2bf6b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 29 Jun 2017 22:22:18 +0000 Subject: [PATCH 2/6] grep: adjust a redundant grep pattern type assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust a now-redundant assignment to extended_regexp_option to make it zero if grep.extendedRegexp is not set. This is always called right after init_grep_defaults() which memsets the entire structure to 0, so there's no need to set it again to zero. However the reason for the if/else pattern is a holdover from[1] where this was adjusted from a bitfield assignment to a boolean. Rather than getting rid of the assignment to 0 in all cases, let's just use the value returned by git_config_bool(), which is more idiomatic and in sync with the rest of the boolean handling in this function. This is a logical follow-up to my commit to remove redundant regflags assignments[2]. This logic was originally introduced in [3], but as explained in the former commit it's working around a pattern in our code that no longer exists, and is now confusing as it leads the reader to think that this needs to be flipped back & forth. 1. 84befcd0a4 ("grep: add a grep.patternType configuration setting", 2012-08-03) 2. e0b9f8ae09 ("grep: remove redundant regflags assignments", 2017-05-25) 3. b22520a37c ("grep: allow -E and -n to be turned on by default via configuration", 2011-03-30) Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/grep.c b/grep.c index 29439886e7..817270d081 100644 --- a/grep.c +++ b/grep.c @@ -78,10 +78,7 @@ int grep_config(const char *var, const char *value, void *cb) return -1; if (!strcmp(var, "grep.extendedregexp")) { - if (git_config_bool(var, value)) - opt->extended_regexp_option = 1; - else - opt->extended_regexp_option = 0; + opt->extended_regexp_option = git_config_bool(var, value); return 0; } From 885ef80d399fa49404608f24598f5df571178603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 29 Jun 2017 22:22:19 +0000 Subject: [PATCH 3/6] grep: remove redundant "fixed" field re-assignment to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the redundant re-assignment of the fixed field to zero right after the entire struct has been set to zero via memset(...). Unlike some nearby commits this pattern doesn't date back to the pattern described in e0b9f8ae09 ("grep: remove redundant regflags assignments", 2017-05-25), instead it was apparently cargo-culted in 9eceddeec6 ("Use kwset in grep", 2011-08-21). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/grep.c b/grep.c index 817270d081..86dc9b696f 100644 --- a/grep.c +++ b/grep.c @@ -626,8 +626,6 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) has_null(p->pattern, p->patternlen) || is_fixed(p->pattern, p->patternlen)) p->fixed = !icase || ascii_only; - else - p->fixed = 0; if (p->fixed) { p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); From b07ed4e532fa9492a53b7b510c45efd671f468a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 29 Jun 2017 22:22:20 +0000 Subject: [PATCH 4/6] grep: remove redundant and verbose re-assignments to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the redundant re-assignments of the fixed/pcre1/pcre2 fields to zero right after the entire struct has been set to zero via memset(...). See an earlier related cleanup commit e0b9f8ae09 ("grep: remove redundant regflags assignments", 2017-05-25) for an explanation of why the code was structured like this to begin with. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/grep.c b/grep.c index 86dc9b696f..7fcdaa0753 100644 --- a/grep.c +++ b/grep.c @@ -174,28 +174,18 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st /* fall through */ case GREP_PATTERN_TYPE_BRE: - opt->fixed = 0; - opt->pcre1 = 0; - opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_ERE: - opt->fixed = 0; - opt->pcre1 = 0; - opt->pcre2 = 0; opt->regflags |= REG_EXTENDED; break; case GREP_PATTERN_TYPE_FIXED: opt->fixed = 1; - opt->pcre1 = 0; - opt->pcre2 = 0; break; case GREP_PATTERN_TYPE_PCRE: - opt->fixed = 0; #ifdef USE_LIBPCRE2 - opt->pcre1 = 0; opt->pcre2 = 1; #else /* @@ -205,7 +195,6 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st * "cannot use Perl-compatible regexes[...]". */ opt->pcre1 = 1; - opt->pcre2 = 0; #endif break; } From 07a3d4117390841ed6884a9eac836918491df0a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 29 Jun 2017 22:22:21 +0000 Subject: [PATCH 5/6] grep: remove regflags from the public grep_opt API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor calls to the grep machinery to always pass opt.ignore_case & opt.extended_regexp_option instead of setting the equivalent regflags bits. The bug fixed when making -i work with -P in commit 9e3cbc59d5 ("log: make --regexp-ignore-case work with --perl-regexp", 2017-05-20) was really just plastering over the code smell which this change fixes. The reason for adding the extensive commentary here is that I discovered some subtle complexity in implementing this that really should be called out explicitly to future readers. Before this change we'd rely on the difference between `extended_regexp_option` and `regflags` to serve as a membrane between our preliminary parsing of grep.extendedRegexp and grep.patternType, and what we decided to do internally. Now that those two are the same thing, it's necessary to unset `extended_regexp_option` just before we commit in cases where both of those config variables are set. See 84befcd0a4 ("grep: add a grep.patternType configuration setting", 2012-08-03) for the code and documentation related to that. The explanation of why the if/else branches in grep_commit_pattern_type() are ordered the way they are exists in that commit message, but I think it's worth calling this subtlety out explicitly with a comment for future readers. Even though grep_commit_pattern_type() is the only caller of grep_set_pattern_type_option() it's simpler to reset the extended_regexp_option flag in the latter, since 2/3 branches in the former would otherwise need to reset it, this way we can do it in one place. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/grep.c | 2 -- grep.c | 43 ++++++++++++++++++++++++++++++++++--------- grep.h | 1 - revision.c | 2 -- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index f61a9d938b..b682966439 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1169,8 +1169,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (!opt.fixed && opt.ignore_case) - opt.regflags |= REG_ICASE; /* * We have to find "--" in a separate pass, because its presence diff --git a/grep.c b/grep.c index 7fcdaa0753..11a86548d6 100644 --- a/grep.c +++ b/grep.c @@ -35,7 +35,6 @@ void init_grep_defaults(void) memset(opt, 0, sizeof(*opt)); opt->relative = 1; opt->pathname = 1; - opt->regflags = REG_NEWLINE; opt->max_depth = -1; opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED; color_set(opt->color_context, ""); @@ -153,7 +152,6 @@ void grep_init(struct grep_opt *opt, const char *prefix) opt->linenum = def->linenum; opt->max_depth = def->max_depth; opt->pathname = def->pathname; - opt->regflags = def->regflags; opt->relative = def->relative; opt->output = def->output; @@ -169,6 +167,24 @@ void grep_init(struct grep_opt *opt, const char *prefix) static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) { + /* + * When committing to the pattern type by setting the relevant + * fields in grep_opt it's generally not necessary to zero out + * the fields we're not choosing, since they won't have been + * set by anything. The extended_regexp_option field is the + * only exception to this. + * + * This is because in the process of parsing grep.patternType + * & grep.extendedRegexp we set opt->pattern_type_option and + * opt->extended_regexp_option, respectively. We then + * internally use opt->extended_regexp_option to see if we're + * compiling an ERE. It must be unset if that's not actually + * the case. + */ + if (pattern_type != GREP_PATTERN_TYPE_ERE && + opt->extended_regexp_option) + opt->extended_regexp_option = 0; + switch (pattern_type) { case GREP_PATTERN_TYPE_UNSPECIFIED: /* fall through */ @@ -177,7 +193,7 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st break; case GREP_PATTERN_TYPE_ERE: - opt->regflags |= REG_EXTENDED; + opt->extended_regexp_option = 1; break; case GREP_PATTERN_TYPE_FIXED: @@ -207,6 +223,11 @@ void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_o else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) grep_set_pattern_type_option(opt->pattern_type_option, opt); else if (opt->extended_regexp_option) + /* + * This branch *must* happen after setting from the + * opt->pattern_type_option above, we don't want + * grep.extendedRegexp to override grep.patternType! + */ grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); } @@ -572,7 +593,7 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { struct strbuf sb = STRBUF_INIT; int err; - int regflags = opt->regflags; + int regflags = REG_NEWLINE; basic_regex_quote_buf(&sb, p->pattern); if (opt->ignore_case) @@ -591,12 +612,12 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int icase, ascii_only; + int ascii_only; int err; + int regflags = REG_NEWLINE; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - icase = opt->regflags & REG_ICASE || p->ignore_case; ascii_only = !has_non_ascii(p->pattern); /* @@ -614,10 +635,10 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (opt->fixed || has_null(p->pattern, p->patternlen) || is_fixed(p->pattern, p->patternlen)) - p->fixed = !icase || ascii_only; + p->fixed = !p->ignore_case || ascii_only; if (p->fixed) { - p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); + p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; @@ -641,7 +662,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } - err = regcomp(&p->regexp, p->pattern, opt->regflags); + if (p->ignore_case) + regflags |= REG_ICASE; + if (opt->extended_regexp_option) + regflags |= REG_EXTENDED; + err = regcomp(&p->regexp, p->pattern, regflags); if (err) { char errbuf[1024]; regerror(err, &p->regexp, errbuf, 1024); diff --git a/grep.h b/grep.h index b8f93bfc2d..0c091e5104 100644 --- a/grep.h +++ b/grep.h @@ -162,7 +162,6 @@ struct grep_opt { char color_match_selected[COLOR_MAXLEN]; char color_selected[COLOR_MAXLEN]; char color_sep[COLOR_MAXLEN]; - int regflags; unsigned pre_context; unsigned post_context; unsigned last_shown; diff --git a/revision.c b/revision.c index e181ad1b70..207103d211 100644 --- a/revision.c +++ b/revision.c @@ -1362,7 +1362,6 @@ void init_revisions(struct rev_info *revs, const char *prefix) init_grep_defaults(); grep_init(&revs->grep_filter, prefix); revs->grep_filter.status_only = 1; - revs->grep_filter.regflags = REG_NEWLINE; diff_setup(&revs->diffopt); if (prefix && !revs->diffopt.prefix) { @@ -2022,7 +2021,6 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE; } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) { revs->grep_filter.ignore_case = 1; - revs->grep_filter.regflags |= REG_ICASE; DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE); } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) { revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED; From 1ceababc4c1817f86094dab1771e23da1ddaf443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 29 Jun 2017 22:22:22 +0000 Subject: [PATCH 6/6] grep: remove redundant REG_NEWLINE when compiling fixed regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the redundant REG_NEWLINE regcomp() flag from the code that compiles a fixed-string regular-expression. The REG_NEWLINE causes metacharacters such as "." to match a newline, since the basic_regex_quote_buf() function being called here escapes all metacharacters using REG_NEWLINE is confusing and redundant. The use of this flag was introduced as an unintended emergent property of 793dc676e0 ("grep/icase: avoid kwsset when -F is specified", 2016-06-25). That change amended the existing regflags, which were initialized to REG_NEWLINE in init_grep_defaults() assuming a subsequent non-fixed regcomp(). Manual testing reveals that this was always redundant, since no flags of any use were inherited from opt->regflags even back then. 793dc676e0 passes all tests with this on top: diff --git a/grep.c b/grep.c index 627ae3e3e8..89e84ed7fd 100644 --- a/grep.c +++ b/grep.c @@ -407,3 +407,3 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) basic_regex_quote_buf(&sb, p->pattern); - regflags = opt->regflags & ~REG_EXTENDED; + regflags = 0; if (opt->ignore_case) Since this isn't used for anything and never was, remove it to reduce confusion when reading this code. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grep.c b/grep.c index 11a86548d6..2efec0e182 100644 --- a/grep.c +++ b/grep.c @@ -593,7 +593,7 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { struct strbuf sb = STRBUF_INIT; int err; - int regflags = REG_NEWLINE; + int regflags = 0; basic_regex_quote_buf(&sb, p->pattern); if (opt->ignore_case)