Browse Source

Merge branch 'rs/fgrep'

* rs/fgrep:
  grep: don't call regexec() for fixed strings
  grep -w: forward to next possible position after rejected match
maint
Junio C Hamano 16 years ago
parent
commit
39d743864b
  1. 40
      grep.c
  2. 1
      grep.h

40
grep.c

@ -28,9 +28,31 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
p->next = NULL; p->next = NULL;
} }


static int isregexspecial(int c)
{
return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
c == '.' || c == '^' || c == '{' || c == '|';
}

static int is_fixed(const char *s)
{
while (!isregexspecial(*s))
s++;
return !*s;
}

static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{ {
int err = regcomp(&p->regexp, p->pattern, opt->regflags); int err;

if (opt->fixed || is_fixed(p->pattern))
p->fixed = 1;
if (opt->regflags & REG_ICASE)
p->fixed = 0;
if (p->fixed)
return;

err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) { if (err) {
char errbuf[1024]; char errbuf[1024];
char where[1024]; char where[1024];
@ -159,8 +181,7 @@ void compile_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */ case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD: case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY: case GREP_PATTERN_BODY:
if (!opt->fixed) compile_regexp(p, opt);
compile_regexp(p, opt);
break; break;
default: default:
opt->extended = 1; opt->extended = 1;
@ -294,7 +315,6 @@ static struct {
static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx) static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
{ {
int hit = 0; int hit = 0;
int at_true_bol = 1;
int saved_ch = 0; int saved_ch = 0;
regmatch_t pmatch[10]; regmatch_t pmatch[10];


@ -315,7 +335,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
} }


again: again:
if (!opt->fixed) { if (!p->fixed) {
regex_t *exp = &p->regexp; regex_t *exp = &p->regexp;
hit = !regexec(exp, bol, ARRAY_SIZE(pmatch), hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
pmatch, 0); pmatch, 0);
@ -337,7 +357,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
* either end of the line, or at word boundary * either end of the line, or at word boundary
* (i.e. the next char must not be a word char). * (i.e. the next char must not be a word char).
*/ */
if ( ((pmatch[0].rm_so == 0 && at_true_bol) || if ( ((pmatch[0].rm_so == 0) ||
!word_char(bol[pmatch[0].rm_so-1])) && !word_char(bol[pmatch[0].rm_so-1])) &&
((pmatch[0].rm_eo == (eol-bol)) || ((pmatch[0].rm_eo == (eol-bol)) ||
!word_char(bol[pmatch[0].rm_eo])) ) !word_char(bol[pmatch[0].rm_eo])) )
@ -349,10 +369,14 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
/* There could be more than one match on the /* There could be more than one match on the
* line, and the first match might not be * line, and the first match might not be
* strict word match. But later ones could be! * strict word match. But later ones could be!
* Forward to the next possible start, i.e. the
* next position following a non-word char.
*/ */
bol = pmatch[0].rm_so + bol + 1; bol = pmatch[0].rm_so + bol + 1;
at_true_bol = 0; while (word_char(bol[-1]) && bol < eol)
goto again; bol++;
if (bol < eol)
goto again;
} }
} }
if (p->token == GREP_PATTERN_HEAD && saved_ch) if (p->token == GREP_PATTERN_HEAD && saved_ch)

1
grep.h

@ -30,6 +30,7 @@ struct grep_pat {
const char *pattern; const char *pattern;
enum grep_header_field field; enum grep_header_field field;
regex_t regexp; regex_t regexp;
unsigned fixed:1;
}; };


enum grep_expr_node { enum grep_expr_node {

Loading…
Cancel
Save