regex: use regexec_buf()
The new regexec_buf() function operates on buffers with an explicitly specified length, rather than NUL-terminated strings. We need to use this function whenever the buffer we want to pass to regexec(3) may have been mmap(2)ed (and is hence not NUL-terminated). Note: the original motivation for this patch was to fix a bug where `git diff -G <regex>` would crash. This patch converts more callers, though, some of which allocated to construct NUL-terminated strings, or worse, modified buffers to temporarily insert NULs while calling regexec(3). By converting them to use regexec_buf(), the code has become much cleaner. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									2f8952250a
								
							
						
					
					
						commit
						b7d36ffca0
					
				
							
								
								
									
										3
									
								
								diff.c
								
								
								
								
							
							
						
						
									
										3
									
								
								diff.c
								
								
								
								
							|  | @ -941,7 +941,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, | ||||||
| { | { | ||||||
| 	if (word_regex && *begin < buffer->size) { | 	if (word_regex && *begin < buffer->size) { | ||||||
| 		regmatch_t match[1]; | 		regmatch_t match[1]; | ||||||
| 		if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { | 		if (!regexec_buf(word_regex, buffer->ptr + *begin, | ||||||
|  | 				 buffer->size - *begin, 1, match, 0)) { | ||||||
| 			char *p = memchr(buffer->ptr + *begin + match[0].rm_so, | 			char *p = memchr(buffer->ptr + *begin + match[0].rm_so, | ||||||
| 					'\n', match[0].rm_eo - match[0].rm_so); | 					'\n', match[0].rm_eo - match[0].rm_so); | ||||||
| 			*end = p ? p - buffer->ptr : match[0].rm_eo + *begin; | 			*end = p ? p - buffer->ptr : match[0].rm_eo + *begin; | ||||||
|  |  | ||||||
|  | @ -21,7 +21,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) | ||||||
| { | { | ||||||
| 	struct diffgrep_cb *data = priv; | 	struct diffgrep_cb *data = priv; | ||||||
| 	regmatch_t regmatch; | 	regmatch_t regmatch; | ||||||
| 	int hold; |  | ||||||
|  |  | ||||||
| 	if (line[0] != '+' && line[0] != '-') | 	if (line[0] != '+' && line[0] != '-') | ||||||
| 		return; | 		return; | ||||||
|  | @ -31,11 +30,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) | ||||||
| 		 * caller early. | 		 * caller early. | ||||||
| 		 */ | 		 */ | ||||||
| 		return; | 		return; | ||||||
| 	/* Yuck -- line ought to be "const char *"! */ | 	data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, | ||||||
| 	hold = line[len]; | 				 ®match, 0); | ||||||
| 	line[len] = '\0'; |  | ||||||
| 	data->hit = !regexec(data->regexp, line + 1, 1, ®match, 0); |  | ||||||
| 	line[len] = hold; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static int diff_grep(mmfile_t *one, mmfile_t *two, | static int diff_grep(mmfile_t *one, mmfile_t *two, | ||||||
|  | @ -48,9 +44,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, | ||||||
| 	xdemitconf_t xecfg; | 	xdemitconf_t xecfg; | ||||||
|  |  | ||||||
| 	if (!one) | 	if (!one) | ||||||
| 		return !regexec(regexp, two->ptr, 1, ®match, 0); | 		return !regexec_buf(regexp, two->ptr, two->size, | ||||||
|  | 				    1, ®match, 0); | ||||||
| 	if (!two) | 	if (!two) | ||||||
| 		return !regexec(regexp, one->ptr, 1, ®match, 0); | 		return !regexec_buf(regexp, one->ptr, one->size, | ||||||
|  | 				    1, ®match, 0); | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * We have both sides; need to run textual diff and see if | 	 * We have both sides; need to run textual diff and see if | ||||||
|  | @ -81,8 +79,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) | ||||||
| 		regmatch_t regmatch; | 		regmatch_t regmatch; | ||||||
| 		int flags = 0; | 		int flags = 0; | ||||||
|  |  | ||||||
| 		assert(data[sz] == '\0'); | 		while (*data && | ||||||
| 		while (*data && !regexec(regexp, data, 1, ®match, flags)) { | 		       !regexec_buf(regexp, data, sz, 1, ®match, flags)) { | ||||||
| 			flags |= REG_NOTBOL; | 			flags |= REG_NOTBOL; | ||||||
| 			data += regmatch.rm_eo; | 			data += regmatch.rm_eo; | ||||||
| 			if (*data && regmatch.rm_so == regmatch.rm_eo) | 			if (*data && regmatch.rm_so == regmatch.rm_eo) | ||||||
|  |  | ||||||
							
								
								
									
										14
									
								
								grep.c
								
								
								
								
							
							
						
						
									
										14
									
								
								grep.c
								
								
								
								
							|  | @ -848,17 +848,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol, | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| static int regmatch(const regex_t *preg, char *line, char *eol, |  | ||||||
| 		    regmatch_t *match, int eflags) |  | ||||||
| { |  | ||||||
| #ifdef REG_STARTEND |  | ||||||
| 	match->rm_so = 0; |  | ||||||
| 	match->rm_eo = eol - line; |  | ||||||
| 	eflags |= REG_STARTEND; |  | ||||||
| #endif |  | ||||||
| 	return regexec(preg, line, 1, match, eflags); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static int patmatch(struct grep_pat *p, char *line, char *eol, | static int patmatch(struct grep_pat *p, char *line, char *eol, | ||||||
| 		    regmatch_t *match, int eflags) | 		    regmatch_t *match, int eflags) | ||||||
| { | { | ||||||
|  | @ -869,7 +858,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, | ||||||
| 	else if (p->pcre_regexp) | 	else if (p->pcre_regexp) | ||||||
| 		hit = !pcrematch(p, line, eol, match, eflags); | 		hit = !pcrematch(p, line, eol, match, eflags); | ||||||
| 	else | 	else | ||||||
| 		hit = !regmatch(&p->regexp, line, eol, match, eflags); | 		hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, | ||||||
|  | 				   eflags); | ||||||
|  |  | ||||||
| 	return hit; | 	return hit; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -14,7 +14,7 @@ test_expect_success setup ' | ||||||
| 	test_tick && | 	test_tick && | ||||||
| 	git commit -m "A 4k file" | 	git commit -m "A 4k file" | ||||||
| ' | ' | ||||||
| test_expect_failure '-G matches' ' | test_expect_success '-G matches' ' | ||||||
| 	git diff --name-only -G "^0{4096}$" HEAD^ >out && | 	git diff --name-only -G "^0{4096}$" HEAD^ >out && | ||||||
| 	test 4096-zeroes.txt = "$(cat out)" | 	test 4096-zeroes.txt = "$(cat out)" | ||||||
| ' | ' | ||||||
|  |  | ||||||
|  | @ -216,11 +216,10 @@ struct ff_regs { | ||||||
| static long ff_regexp(const char *line, long len, | static long ff_regexp(const char *line, long len, | ||||||
| 		char *buffer, long buffer_size, void *priv) | 		char *buffer, long buffer_size, void *priv) | ||||||
| { | { | ||||||
| 	char *line_buffer; |  | ||||||
| 	struct ff_regs *regs = priv; | 	struct ff_regs *regs = priv; | ||||||
| 	regmatch_t pmatch[2]; | 	regmatch_t pmatch[2]; | ||||||
| 	int i; | 	int i; | ||||||
| 	int result = -1; | 	int result; | ||||||
|  |  | ||||||
| 	/* Exclude terminating newline (and cr) from matching */ | 	/* Exclude terminating newline (and cr) from matching */ | ||||||
| 	if (len > 0 && line[len-1] == '\n') { | 	if (len > 0 && line[len-1] == '\n') { | ||||||
|  | @ -230,18 +229,16 @@ static long ff_regexp(const char *line, long len, | ||||||
| 			len--; | 			len--; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	line_buffer = xstrndup(line, len); /* make NUL terminated */ |  | ||||||
|  |  | ||||||
| 	for (i = 0; i < regs->nr; i++) { | 	for (i = 0; i < regs->nr; i++) { | ||||||
| 		struct ff_reg *reg = regs->array + i; | 		struct ff_reg *reg = regs->array + i; | ||||||
| 		if (!regexec(®->re, line_buffer, 2, pmatch, 0)) { | 		if (!regexec_buf(®->re, line, len, 2, pmatch, 0)) { | ||||||
| 			if (reg->negate) | 			if (reg->negate) | ||||||
| 				goto fail; | 				return -1; | ||||||
| 			break; | 			break; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	if (regs->nr <= i) | 	if (regs->nr <= i) | ||||||
| 		goto fail; | 		return -1; | ||||||
| 	i = pmatch[1].rm_so >= 0 ? 1 : 0; | 	i = pmatch[1].rm_so >= 0 ? 1 : 0; | ||||||
| 	line += pmatch[i].rm_so; | 	line += pmatch[i].rm_so; | ||||||
| 	result = pmatch[i].rm_eo - pmatch[i].rm_so; | 	result = pmatch[i].rm_eo - pmatch[i].rm_so; | ||||||
|  | @ -250,8 +247,6 @@ static long ff_regexp(const char *line, long len, | ||||||
| 	while (result > 0 && (isspace(line[result - 1]))) | 	while (result > 0 && (isspace(line[result - 1]))) | ||||||
| 		result--; | 		result--; | ||||||
| 	memcpy(buffer, line, result); | 	memcpy(buffer, line, result); | ||||||
|  fail: |  | ||||||
| 	free(line_buffer); |  | ||||||
| 	return result; | 	return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Johannes Schindelin
						Johannes Schindelin