tree_entry_interesting: do basedir compare on wildcard patterns when possible
Currently we treat "*.c" and "path/to/*.c" the same way. Which means we check all possible paths in repo against "path/to/*.c". One could see that "path/elsewhere/foo.c" obviously cannot match "path/to/*.c" and we only need to check all paths _inside_ "path/to/" against that pattern. This patch checks the leading fixed part of a pathspec against base directory and exit early if possible. We could even optimize further in "path/to/something*.c" case (i.e. check the fixed part against name_entry as well) but that's more complicated and probably does not gain us much. -O2 build on linux-2.6, without and with this patch respectively: $ time git rev-list --quiet HEAD -- 'drivers/*.c' real 1m9.484s user 1m9.128s sys 0m0.181s $ time ~/w/git/git rev-list --quiet HEAD -- 'drivers/*.c' real 0m15.710s user 0m15.564s sys 0m0.107s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									8c6abbcd27
								
							
						
					
					
						commit
						c904cd89e4
					
				
							
								
								
									
										65
									
								
								tree-walk.c
								
								
								
								
							
							
						
						
									
										65
									
								
								tree-walk.c
								
								
								
								
							|  | @ -572,6 +572,54 @@ static int match_dir_prefix(const char *base, | |||
| 	return 0; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Perform matching on the leading non-wildcard part of | ||||
|  * pathspec. item->nowildcard_len must be greater than zero. Return | ||||
|  * non-zero if base is matched. | ||||
|  */ | ||||
| static int match_wildcard_base(const struct pathspec_item *item, | ||||
| 			       const char *base, int baselen, | ||||
| 			       int *matched) | ||||
| { | ||||
| 	const char *match = item->match; | ||||
| 	/* the wildcard part is not considered in this function */ | ||||
| 	int matchlen = item->nowildcard_len; | ||||
|  | ||||
| 	if (baselen) { | ||||
| 		int dirlen; | ||||
| 		/* | ||||
| 		 * Return early if base is longer than the | ||||
| 		 * non-wildcard part but it does not match. | ||||
| 		 */ | ||||
| 		if (baselen >= matchlen) { | ||||
| 			*matched = matchlen; | ||||
| 			return !strncmp(base, match, matchlen); | ||||
| 		} | ||||
|  | ||||
| 		dirlen = matchlen; | ||||
| 		while (dirlen && match[dirlen - 1] != '/') | ||||
| 			dirlen--; | ||||
|  | ||||
| 		/* | ||||
| 		 * Return early if base is shorter than the | ||||
| 		 * non-wildcard part but it does not match. Note that | ||||
| 		 * base ends with '/' so we are sure it really matches | ||||
| 		 * directory | ||||
| 		 */ | ||||
| 		if (strncmp(base, match, baselen)) | ||||
| 			return 0; | ||||
| 		*matched = baselen; | ||||
| 	} else | ||||
| 		*matched = 0; | ||||
| 	/* | ||||
| 	 * we could have checked entry against the non-wildcard part | ||||
| 	 * that is not in base and does similar never_interesting | ||||
| 	 * optimization as in match_entry. For now just be happy with | ||||
| 	 * base comparison. | ||||
| 	 */ | ||||
| 	return entry_interesting; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Is a tree entry interesting given the pathspec we have? | ||||
|  * | ||||
|  | @ -602,7 +650,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, | |||
| 		const struct pathspec_item *item = ps->items+i; | ||||
| 		const char *match = item->match; | ||||
| 		const char *base_str = base->buf + base_offset; | ||||
| 		int matchlen = item->len; | ||||
| 		int matchlen = item->len, matched = 0; | ||||
|  | ||||
| 		if (baselen >= matchlen) { | ||||
| 			/* If it doesn't match, move along... */ | ||||
|  | @ -647,9 +695,24 @@ match_wildcards: | |||
| 		if (item->nowildcard_len == item->len) | ||||
| 			continue; | ||||
|  | ||||
| 		if (item->nowildcard_len && | ||||
| 		    !match_wildcard_base(item, base_str, baselen, &matched)) | ||||
| 			return entry_not_interesting; | ||||
|  | ||||
| 		/* | ||||
| 		 * Concatenate base and entry->path into one and do | ||||
| 		 * fnmatch() on it. | ||||
| 		 * | ||||
| 		 * While we could avoid concatenation in certain cases | ||||
| 		 * [1], which saves a memcpy and potentially a | ||||
| 		 * realloc, it turns out not worth it. Measurement on | ||||
| 		 * linux-2.6 does not show any clear improvements, | ||||
| 		 * partly because of the nowildcard_len optimization | ||||
| 		 * in git_fnmatch(). Avoid micro-optimizations here. | ||||
| 		 * | ||||
| 		 * [1] if match_wildcard_base() says the base | ||||
| 		 * directory is already matched, we only need to match | ||||
| 		 * the rest, which is shorter so _in theory_ faster. | ||||
| 		 */ | ||||
|  | ||||
| 		strbuf_add(base, entry->path, pathlen); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Nguyễn Thái Ngọc Duy
						Nguyễn Thái Ngọc Duy