Fix error-prone fill_directory() API; make it only return matches
Traditionally, the expected calling convention for the dir.c API was:
    fill_directory(&dir, ..., pathspec)
    foreach entry in dir->entries:
        if (dir_path_match(entry, pathspec))
            process_or_display(entry)
This may have made sense once upon a time, because the fill_directory() call
could use cheap checks to avoid doing full pathspec matching, and an external
caller may have wanted to do other post-processing of the results anyway.
However:
    * this structure makes it easy for users of the API to get it wrong
    * this structure actually makes it harder to understand
      fill_directory() and the functions it uses internally.  It has
      tripped me up several times while trying to fix bugs and
      restructure things.
    * relying on post-filtering was already found to produce wrong
      results; pathspec matching had to be added internally for multiple
      cases in order to get the right results (see commits 404ebceda0
      (dir: also check directories for matching pathspecs, 2019-09-17)
      and 89a1f4aaf7 (dir: if our pathspec might match files under a
      dir, recurse into it, 2019-09-17))
    * it's bad for performance: fill_directory() already has to do lots
      of checks and knows the subset of cases where it still needs to do
      more checks.  Forcing external callers to do full pathspec
      matching means they must re-check _every_ path.
So, add the pathspec matching within the fill_directory() internals, and
remove it from external callers.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
			
			
				maint
			
			
		
							parent
							
								
									7f45ab2dca
								
							
						
					
					
						commit
						95c11ecc73
					
				|  | @ -989,12 +989,6 @@ int cmd_clean(int argc, const char **argv, const char *prefix) | ||||||
| 		if (!cache_name_is_other(ent->name, ent->len)) | 		if (!cache_name_is_other(ent->name, ent->len)) | ||||||
| 			continue; | 			continue; | ||||||
|  |  | ||||||
| 		if (pathspec.nr) |  | ||||||
| 			matches = dir_path_match(&the_index, ent, &pathspec, 0, NULL); |  | ||||||
|  |  | ||||||
| 		if (pathspec.nr && !matches) |  | ||||||
| 			continue; |  | ||||||
|  |  | ||||||
| 		if (lstat(ent->name, &st)) | 		if (lstat(ent->name, &st)) | ||||||
| 			die_errno("Cannot lstat '%s'", ent->name); | 			die_errno("Cannot lstat '%s'", ent->name); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @ -691,8 +691,6 @@ static int grep_directory(struct grep_opt *opt, const struct pathspec *pathspec, | ||||||
|  |  | ||||||
| 	fill_directory(&dir, opt->repo->index, pathspec); | 	fill_directory(&dir, opt->repo->index, pathspec); | ||||||
| 	for (i = 0; i < dir.nr; i++) { | 	for (i = 0; i < dir.nr; i++) { | ||||||
| 		if (!dir_path_match(opt->repo->index, dir.entries[i], pathspec, 0, NULL)) |  | ||||||
| 			continue; |  | ||||||
| 		hit |= grep_file(opt, dir.entries[i]->name); | 		hit |= grep_file(opt, dir.entries[i]->name); | ||||||
| 		if (hit && opt->status_only) | 		if (hit && opt->status_only) | ||||||
| 			break; | 			break; | ||||||
|  |  | ||||||
|  | @ -128,8 +128,9 @@ static void show_dir_entry(const struct index_state *istate, | ||||||
| 	if (len > ent->len) | 	if (len > ent->len) | ||||||
| 		die("git ls-files: internal error - directory entry not superset of prefix"); | 		die("git ls-files: internal error - directory entry not superset of prefix"); | ||||||
|  |  | ||||||
| 	if (!dir_path_match(istate, ent, &pathspec, len, ps_matched)) | 	/* If ps_matches is non-NULL, figure out which pathspec(s) match. */ | ||||||
| 		return; | 	if (ps_matched) | ||||||
|  | 		dir_path_match(istate, ent, &pathspec, len, ps_matched); | ||||||
|  |  | ||||||
| 	fputs(tag, stdout); | 	fputs(tag, stdout); | ||||||
| 	write_eolinfo(istate, NULL, ent->name); | 	write_eolinfo(istate, NULL, ent->name); | ||||||
|  |  | ||||||
|  | @ -856,30 +856,23 @@ static int get_untracked_files(const struct pathspec *ps, int include_untracked, | ||||||
| 			       struct strbuf *untracked_files) | 			       struct strbuf *untracked_files) | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
| 	int max_len; |  | ||||||
| 	int found = 0; | 	int found = 0; | ||||||
| 	char *seen; |  | ||||||
| 	struct dir_struct dir; | 	struct dir_struct dir; | ||||||
|  |  | ||||||
| 	memset(&dir, 0, sizeof(dir)); | 	memset(&dir, 0, sizeof(dir)); | ||||||
| 	if (include_untracked != INCLUDE_ALL_FILES) | 	if (include_untracked != INCLUDE_ALL_FILES) | ||||||
| 		setup_standard_excludes(&dir); | 		setup_standard_excludes(&dir); | ||||||
|  |  | ||||||
| 	seen = xcalloc(ps->nr, 1); | 	fill_directory(&dir, the_repository->index, ps); | ||||||
|  |  | ||||||
| 	max_len = fill_directory(&dir, the_repository->index, ps); |  | ||||||
| 	for (i = 0; i < dir.nr; i++) { | 	for (i = 0; i < dir.nr; i++) { | ||||||
| 		struct dir_entry *ent = dir.entries[i]; | 		struct dir_entry *ent = dir.entries[i]; | ||||||
| 		if (dir_path_match(&the_index, ent, ps, max_len, seen)) { | 		found++; | ||||||
| 			found++; | 		strbuf_addstr(untracked_files, ent->name); | ||||||
| 			strbuf_addstr(untracked_files, ent->name); | 		/* NUL-terminate: will be fed to update-index -z */ | ||||||
| 			/* NUL-terminate: will be fed to update-index -z */ | 		strbuf_addch(untracked_files, '\0'); | ||||||
| 			strbuf_addch(untracked_files, '\0'); |  | ||||||
| 		} |  | ||||||
| 		free(ent); | 		free(ent); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	free(seen); |  | ||||||
| 	free(dir.entries); | 	free(dir.entries); | ||||||
| 	free(dir.ignored); | 	free(dir.ignored); | ||||||
| 	clear_directory(&dir); | 	clear_directory(&dir); | ||||||
|  |  | ||||||
							
								
								
									
										9
									
								
								dir.c
								
								
								
								
							
							
						
						
									
										9
									
								
								dir.c
								
								
								
								
							|  | @ -2117,7 +2117,14 @@ static enum path_treatment treat_path(struct dir_struct *dir, | ||||||
| 				       baselen, excluded, pathspec); | 				       baselen, excluded, pathspec); | ||||||
| 	case DT_REG: | 	case DT_REG: | ||||||
| 	case DT_LNK: | 	case DT_LNK: | ||||||
| 		return excluded ? path_excluded : path_untracked; | 		if (excluded) | ||||||
|  | 			return path_excluded; | ||||||
|  | 		if (pathspec && | ||||||
|  | 		    !do_match_pathspec(istate, pathspec, path->buf, path->len, | ||||||
|  | 				       0 /* prefix */, NULL /* seen */, | ||||||
|  | 				       0 /* flags */)) | ||||||
|  | 			return path_none; | ||||||
|  | 		return path_untracked; | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @ -722,16 +722,14 @@ static void wt_status_collect_untracked(struct wt_status *s) | ||||||
|  |  | ||||||
| 	for (i = 0; i < dir.nr; i++) { | 	for (i = 0; i < dir.nr; i++) { | ||||||
| 		struct dir_entry *ent = dir.entries[i]; | 		struct dir_entry *ent = dir.entries[i]; | ||||||
| 		if (index_name_is_other(istate, ent->name, ent->len) && | 		if (index_name_is_other(istate, ent->name, ent->len)) | ||||||
| 		    dir_path_match(istate, ent, &s->pathspec, 0, NULL)) |  | ||||||
| 			string_list_insert(&s->untracked, ent->name); | 			string_list_insert(&s->untracked, ent->name); | ||||||
| 		free(ent); | 		free(ent); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for (i = 0; i < dir.ignored_nr; i++) { | 	for (i = 0; i < dir.ignored_nr; i++) { | ||||||
| 		struct dir_entry *ent = dir.ignored[i]; | 		struct dir_entry *ent = dir.ignored[i]; | ||||||
| 		if (index_name_is_other(istate, ent->name, ent->len) && | 		if (index_name_is_other(istate, ent->name, ent->len)) | ||||||
| 		    dir_path_match(istate, ent, &s->pathspec, 0, NULL)) |  | ||||||
| 			string_list_insert(&s->ignored, ent->name); | 			string_list_insert(&s->ignored, ent->name); | ||||||
| 		free(ent); | 		free(ent); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Elijah Newren
						Elijah Newren