regex: use regexec_buf()
The new regexec_buf() function operates on buffers with an explicitly specified length, rather than NUL-terminated strings. We need to use this function whenever the buffer we want to pass to regexec(3) may have been mmap(2)ed (and is hence not NUL-terminated). Note: the original motivation for this patch was to fix a bug where `git diff -G <regex>` would crash. This patch converts more callers, though, some of which allocated to construct NUL-terminated strings, or worse, modified buffers to temporarily insert NULs while calling regexec(3). By converting them to use regexec_buf(), the code has become much cleaner. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									2f8952250a
								
							
						
					
					
						commit
						b7d36ffca0
					
				
							
								
								
									
										3
									
								
								diff.c
								
								
								
								
							
							
						
						
									
										3
									
								
								diff.c
								
								
								
								
							|  | @ -941,7 +941,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, | |||
| { | ||||
| 	if (word_regex && *begin < buffer->size) { | ||||
| 		regmatch_t match[1]; | ||||
| 		if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { | ||||
| 		if (!regexec_buf(word_regex, buffer->ptr + *begin, | ||||
| 				 buffer->size - *begin, 1, match, 0)) { | ||||
| 			char *p = memchr(buffer->ptr + *begin + match[0].rm_so, | ||||
| 					'\n', match[0].rm_eo - match[0].rm_so); | ||||
| 			*end = p ? p - buffer->ptr : match[0].rm_eo + *begin; | ||||
|  |  | |||
|  | @ -21,7 +21,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) | |||
| { | ||||
| 	struct diffgrep_cb *data = priv; | ||||
| 	regmatch_t regmatch; | ||||
| 	int hold; | ||||
|  | ||||
| 	if (line[0] != '+' && line[0] != '-') | ||||
| 		return; | ||||
|  | @ -31,11 +30,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) | |||
| 		 * caller early. | ||||
| 		 */ | ||||
| 		return; | ||||
| 	/* Yuck -- line ought to be "const char *"! */ | ||||
| 	hold = line[len]; | ||||
| 	line[len] = '\0'; | ||||
| 	data->hit = !regexec(data->regexp, line + 1, 1, ®match, 0); | ||||
| 	line[len] = hold; | ||||
| 	data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, | ||||
| 				 ®match, 0); | ||||
| } | ||||
|  | ||||
| static int diff_grep(mmfile_t *one, mmfile_t *two, | ||||
|  | @ -48,9 +44,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, | |||
| 	xdemitconf_t xecfg; | ||||
|  | ||||
| 	if (!one) | ||||
| 		return !regexec(regexp, two->ptr, 1, ®match, 0); | ||||
| 		return !regexec_buf(regexp, two->ptr, two->size, | ||||
| 				    1, ®match, 0); | ||||
| 	if (!two) | ||||
| 		return !regexec(regexp, one->ptr, 1, ®match, 0); | ||||
| 		return !regexec_buf(regexp, one->ptr, one->size, | ||||
| 				    1, ®match, 0); | ||||
|  | ||||
| 	/* | ||||
| 	 * We have both sides; need to run textual diff and see if | ||||
|  | @ -81,8 +79,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) | |||
| 		regmatch_t regmatch; | ||||
| 		int flags = 0; | ||||
|  | ||||
| 		assert(data[sz] == '\0'); | ||||
| 		while (*data && !regexec(regexp, data, 1, ®match, flags)) { | ||||
| 		while (*data && | ||||
| 		       !regexec_buf(regexp, data, sz, 1, ®match, flags)) { | ||||
| 			flags |= REG_NOTBOL; | ||||
| 			data += regmatch.rm_eo; | ||||
| 			if (*data && regmatch.rm_so == regmatch.rm_eo) | ||||
|  |  | |||
							
								
								
									
										14
									
								
								grep.c
								
								
								
								
							
							
						
						
									
										14
									
								
								grep.c
								
								
								
								
							|  | @ -848,17 +848,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol, | |||
| 	} | ||||
| } | ||||
|  | ||||
| static int regmatch(const regex_t *preg, char *line, char *eol, | ||||
| 		    regmatch_t *match, int eflags) | ||||
| { | ||||
| #ifdef REG_STARTEND | ||||
| 	match->rm_so = 0; | ||||
| 	match->rm_eo = eol - line; | ||||
| 	eflags |= REG_STARTEND; | ||||
| #endif | ||||
| 	return regexec(preg, line, 1, match, eflags); | ||||
| } | ||||
|  | ||||
| static int patmatch(struct grep_pat *p, char *line, char *eol, | ||||
| 		    regmatch_t *match, int eflags) | ||||
| { | ||||
|  | @ -869,7 +858,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, | |||
| 	else if (p->pcre_regexp) | ||||
| 		hit = !pcrematch(p, line, eol, match, eflags); | ||||
| 	else | ||||
| 		hit = !regmatch(&p->regexp, line, eol, match, eflags); | ||||
| 		hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, | ||||
| 				   eflags); | ||||
|  | ||||
| 	return hit; | ||||
| } | ||||
|  |  | |||
|  | @ -14,7 +14,7 @@ test_expect_success setup ' | |||
| 	test_tick && | ||||
| 	git commit -m "A 4k file" | ||||
| ' | ||||
| test_expect_failure '-G matches' ' | ||||
| test_expect_success '-G matches' ' | ||||
| 	git diff --name-only -G "^0{4096}$" HEAD^ >out && | ||||
| 	test 4096-zeroes.txt = "$(cat out)" | ||||
| ' | ||||
|  |  | |||
|  | @ -216,11 +216,10 @@ struct ff_regs { | |||
| static long ff_regexp(const char *line, long len, | ||||
| 		char *buffer, long buffer_size, void *priv) | ||||
| { | ||||
| 	char *line_buffer; | ||||
| 	struct ff_regs *regs = priv; | ||||
| 	regmatch_t pmatch[2]; | ||||
| 	int i; | ||||
| 	int result = -1; | ||||
| 	int result; | ||||
|  | ||||
| 	/* Exclude terminating newline (and cr) from matching */ | ||||
| 	if (len > 0 && line[len-1] == '\n') { | ||||
|  | @ -230,18 +229,16 @@ static long ff_regexp(const char *line, long len, | |||
| 			len--; | ||||
| 	} | ||||
|  | ||||
| 	line_buffer = xstrndup(line, len); /* make NUL terminated */ | ||||
|  | ||||
| 	for (i = 0; i < regs->nr; i++) { | ||||
| 		struct ff_reg *reg = regs->array + i; | ||||
| 		if (!regexec(®->re, line_buffer, 2, pmatch, 0)) { | ||||
| 		if (!regexec_buf(®->re, line, len, 2, pmatch, 0)) { | ||||
| 			if (reg->negate) | ||||
| 				goto fail; | ||||
| 				return -1; | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 	if (regs->nr <= i) | ||||
| 		goto fail; | ||||
| 		return -1; | ||||
| 	i = pmatch[1].rm_so >= 0 ? 1 : 0; | ||||
| 	line += pmatch[i].rm_so; | ||||
| 	result = pmatch[i].rm_eo - pmatch[i].rm_so; | ||||
|  | @ -250,8 +247,6 @@ static long ff_regexp(const char *line, long len, | |||
| 	while (result > 0 && (isspace(line[result - 1]))) | ||||
| 		result--; | ||||
| 	memcpy(buffer, line, result); | ||||
|  fail: | ||||
| 	free(line_buffer); | ||||
| 	return result; | ||||
| } | ||||
|  | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Johannes Schindelin
						Johannes Schindelin