From 7f9dd87922c9065c8adcea1469c3caf3b3af2afa Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:14 +0000 Subject: [PATCH 1/9] dir: convert trace calls to trace2 equivalents Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 50 ++++++-- t/t7063-status-untracked-cache.sh | 205 ++++++++++++++++++------------ t/t7519-status-fsmonitor.sh | 8 +- 3 files changed, 162 insertions(+), 101 deletions(-) diff --git a/dir.c b/dir.c index 19c2fa239b..b8dfe2e413 100644 --- a/dir.c +++ b/dir.c @@ -2751,15 +2751,46 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d return root; } +static void emit_traversal_statistics(struct dir_struct *dir, + struct repository *repo, + const char *path, + int path_len) +{ + if (!trace2_is_enabled()) + return; + + if (!path_len) { + trace2_data_string("read_directory", repo, "path", ""); + } else { + struct strbuf tmp = STRBUF_INIT; + strbuf_add(&tmp, path, path_len); + trace2_data_string("read_directory", repo, "path", tmp.buf); + strbuf_release(&tmp); + } + + if (!dir->untracked) + return; + trace2_data_intmax("read_directory", repo, + "node-creation", dir->untracked->dir_created); + trace2_data_intmax("read_directory", repo, + "gitignore-invalidation", + dir->untracked->gitignore_invalidated); + trace2_data_intmax("read_directory", repo, + "directory-invalidation", + dir->untracked->dir_invalidated); + trace2_data_intmax("read_directory", repo, + "opendir", dir->untracked->dir_opened); +} + int read_directory(struct dir_struct *dir, struct index_state *istate, const char *path, int len, const struct pathspec *pathspec) { struct untracked_cache_dir *untracked; - trace_performance_enter(); + trace2_region_enter("dir", "read_directory", istate->repo); if (has_symlink_leading_path(path, len)) { - trace_performance_leave("read directory %.*s", len, path); + trace2_region_leave("dir", "read_directory", istate->repo); return dir->nr; } @@ -2775,23 +2806,15 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, QSORT(dir->entries, dir->nr, cmp_dir_entry); QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry); - trace_performance_leave("read directory %.*s", len, path); + emit_traversal_statistics(dir, istate->repo, path, len); + + trace2_region_leave("dir", "read_directory", istate->repo); if (dir->untracked) { static int force_untracked_cache = -1; - static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS); if (force_untracked_cache < 0) force_untracked_cache = git_env_bool("GIT_FORCE_UNTRACKED_CACHE", 0); - trace_printf_key(&trace_untracked_stats, - "node creation: %u\n" - "gitignore invalidation: %u\n" - "directory invalidation: %u\n" - "opendir: %u\n", - dir->untracked->dir_created, - dir->untracked->gitignore_invalidated, - dir->untracked->dir_invalidated, - dir->untracked->dir_opened); if (force_untracked_cache && dir->untracked == istate->untracked && (dir->untracked->dir_opened || @@ -2802,6 +2825,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, FREE_AND_NULL(dir->untracked); } } + return dir->nr; } diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index accefde72f..9710d33b3c 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -57,6 +57,19 @@ iuc () { return $ret } +get_relevant_traces () { + # From the GIT_TRACE2_PERF data of the form + # $TIME $FILE:$LINE | d0 | main | data | r1 | ? | ? | read_directo | $RELEVANT_STAT + # extract the $RELEVANT_STAT fields. We don't care about region_enter + # or region_leave, or stats for things outside read_directory. + INPUT_FILE=$1 + OUTPUT_FILE=$2 + grep data.*read_directo $INPUT_FILE | + cut -d "|" -f 9 \ + >"$OUTPUT_FILE" +} + + test_lazy_prereq UNTRACKED_CACHE ' { git update-index --test-untracked-cache; ret=$?; } && test $ret -ne 1 @@ -129,19 +142,21 @@ EOF test_expect_success 'status first time (empty cache)' ' avoid_racy && - : >../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && test_cmp ../status.expect ../status.iuc && test_cmp ../status.expect ../actual && + get_relevant_traces ../trace.output ../trace.relevant && cat >../trace.expect <../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && test_cmp ../status.expect ../status.iuc && test_cmp ../status.expect ../actual && + get_relevant_traces ../trace.output ../trace.relevant && cat >../trace.expect <four && - : >../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <.gitignore && - : >../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <>.git/info/exclude && - : >../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <../trace && + : >../trace.output && avoid_racy && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../status.actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../status.actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../status.actual && iuc status --porcelain >../status.iuc && cat >../status.expect <../trace.expect <../trace && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + : >../trace.output && + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ git status --porcelain >../status.actual && iuc status --porcelain >../status.iuc && test_cmp ../status.expect ../status.iuc && test_cmp ../status.expect ../status.actual && + get_relevant_traces ../trace.output ../trace.relevant && cat >../trace.expect <../before ) && @@ -346,12 +346,12 @@ test_expect_success UNTRACKED_CACHE 'ignore .git changes when invalidating UNTR' EOF ( cd dot-git && - GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace-after" \ + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace-after" \ git status && test-tool dump-untracked-cache >../after ) && - grep "directory invalidation" trace-before >>before && - grep "directory invalidation" trace-after >>after && + grep "directory-invalidation" trace-before | cut -d"|" -f 9 >>before && + grep "directory-invalidation" trace-after | cut -d"|" -f 9 >>after && # UNTR extension unchanged, dir invalidation count unchanged test_cmp before after ' From 7fe1ffdafa56b8453a47a40b866d029f24a56d76 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:15 +0000 Subject: [PATCH 2/9] dir: report number of visited directories and paths with trace2 Provide more statistics in trace2 output that include the number of directories and total paths visited by the directory traversal logic. Subsequent patches will take advantage of this to ensure we do not unnecessarily traverse into ignored directories. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 9 +++++++++ dir.h | 4 ++++ t/t7063-status-untracked-cache.sh | 3 ++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index b8dfe2e413..151cc37e23 100644 --- a/dir.c +++ b/dir.c @@ -2431,6 +2431,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, if (open_cached_dir(&cdir, dir, untracked, istate, &path, check_only)) goto out; + dir->visited_directories++; if (untracked) untracked->check_only = !!check_only; @@ -2439,6 +2440,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, /* check how the file or directory should be treated */ state = treat_path(dir, untracked, &cdir, istate, &path, baselen, pathspec); + dir->visited_paths++; if (state > dir_state) dir_state = state; @@ -2768,6 +2770,11 @@ static void emit_traversal_statistics(struct dir_struct *dir, strbuf_release(&tmp); } + trace2_data_intmax("read_directory", repo, + "directories-visited", dir->visited_directories); + trace2_data_intmax("read_directory", repo, + "paths-visited", dir->visited_paths); + if (!dir->untracked) return; trace2_data_intmax("read_directory", repo, @@ -2788,6 +2795,8 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, struct untracked_cache_dir *untracked; trace2_region_enter("dir", "read_directory", istate->repo); + dir->visited_paths = 0; + dir->visited_directories = 0; if (has_symlink_leading_path(path, len)) { trace2_region_leave("dir", "read_directory", istate->repo); diff --git a/dir.h b/dir.h index facfae4740..70c750e305 100644 --- a/dir.h +++ b/dir.h @@ -336,6 +336,10 @@ struct dir_struct { struct oid_stat ss_info_exclude; struct oid_stat ss_excludes_file; unsigned unmanaged_exclude_files; + + /* Stats about the traversal */ + unsigned visited_paths; + unsigned visited_directories; }; /*Count the number of slashes for string s*/ diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index 9710d33b3c..a0c123b0a7 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -65,7 +65,8 @@ get_relevant_traces () { INPUT_FILE=$1 OUTPUT_FILE=$2 grep data.*read_directo $INPUT_FILE | - cut -d "|" -f 9 \ + cut -d "|" -f 9 | + grep -v visited \ >"$OUTPUT_FILE" } From b338e9f668737e08201c990450b8c3d744f63162 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:16 +0000 Subject: [PATCH 3/9] ls-files: error out on -i unless -o or -c are specified ls-files --ignored can be used together with either --others or --cached. After being perplexed for a bit and digging in to the code, I assumed that ls-files -i was just broken and not printing anything and I had a nice patch ready to submit when I finally realized that -i can be used with --cached to find tracked ignores. While that was a mistake on my part, and a careful reading of the documentation could have made this more clear, I suspect this is an error others are likely to make as well. In fact, of two uses in our testsuite, I believe one of the two did make this error. In t1306.13, there are NO tracked files, and all the excludes built up and used in that test and in previous tests thus have to be about untracked files. However, since they were looking for an empty result, the mistake went unnoticed as their erroneous command also just happened to give an empty answer. -i will most the time be used with -o, which would suggest we could just make -i imply -o in the absence of either a -o or -c, but that would be a backward incompatible break. Instead, let's just flag -i without either a -o or -c as an error, and update the two relevant testcases to specify their intent. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 3 +++ t/t1306-xdg-files.sh | 2 +- t/t3003-ls-files-exclude.sh | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index f6f9e483b2..cd01404417 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -675,6 +675,9 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix) if (pathspec.nr && error_unmatch) ps_matched = xcalloc(pathspec.nr, 1); + if ((dir.flags & DIR_SHOW_IGNORED) && !show_others && !show_cached) + die("ls-files -i must be used with either -o or -c"); + if ((dir.flags & DIR_SHOW_IGNORED) && !exc_given) die("ls-files --ignored needs some exclude pattern"); diff --git a/t/t1306-xdg-files.sh b/t/t1306-xdg-files.sh index dd87b43be1..40d3c42618 100755 --- a/t/t1306-xdg-files.sh +++ b/t/t1306-xdg-files.sh @@ -116,7 +116,7 @@ test_expect_success 'Exclusion in a non-XDG global ignore file' ' test_expect_success 'Checking XDG ignore file when HOME is unset' ' (sane_unset HOME && git config --unset core.excludesfile && - git ls-files --exclude-standard --ignored >actual) && + git ls-files --exclude-standard --ignored --others >actual) && test_must_be_empty actual ' diff --git a/t/t3003-ls-files-exclude.sh b/t/t3003-ls-files-exclude.sh index d5ec333131..c41c4f046a 100755 --- a/t/t3003-ls-files-exclude.sh +++ b/t/t3003-ls-files-exclude.sh @@ -29,11 +29,11 @@ test_expect_success 'add file to gitignore' ' ' check_all_output -test_expect_success 'ls-files -i lists only tracked-but-ignored files' ' +test_expect_success 'ls-files -i -c lists only tracked-but-ignored files' ' echo content >other-file && git add other-file && echo file >expect && - git ls-files -i --exclude-standard >output && + git ls-files -i -c --exclude-standard >output && test_cmp expect output ' From 2e4e43a6910393d681d095f515d41232c2372966 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:17 +0000 Subject: [PATCH 4/9] t7300: add testcase showing unnecessary traversal into ignored directory The PNPM package manager is apparently creating deeply nested (but ignored) directory structures; traversing them is costly performance-wise, unnecessary, and in some cases is even throwing warnings/errors because the paths are too long to handle on various platforms. Add a testcase that checks for such unnecessary directory traversal. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/t7300-clean.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index a74816ca8b..07e8ba2d4b 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -746,4 +746,27 @@ test_expect_success 'clean untracked paths by pathspec' ' test_must_be_empty actual ' +test_expect_failure 'avoid traversing into ignored directories' ' + test_when_finished rm -f output error trace.* && + test_create_repo avoid-traversing-deep-hierarchy && + ( + cd avoid-traversing-deep-hierarchy && + + mkdir -p untracked/subdir/with/a && + >untracked/subdir/with/a/random-file.txt && + + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \ + git clean -ffdxn -e untracked + ) && + + # Make sure we only visited into the top-level directory, and did + # not traverse into the "untracked" subdirectory since it was excluded + grep data.*read_directo.*directories-visited trace.output | + cut -d "|" -f 9 >trace.relevant && + cat >trace.expect <<-EOF && + ..directories-visited:1 + EOF + test_cmp trace.expect trace.relevant +' + test_done From a97c7a8bc4f2068b2e8c23ce8d17f7db8333ded0 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:18 +0000 Subject: [PATCH 5/9] t3001, t7300: add testcase showcasing missed directory traversal In the last commit, we added a testcase showing that the directory traversal machinery sometimes traverses into directories unnecessarily. Here we show that there are cases where it does the opposite: it does not traverse into directories, despite those directories having important files that need to be flagged. Add a testcase showing that `git ls-files -o -i --directory` can omit some of the files it should be listing, and another showing that `git clean -fX` can fail to clean out some of the expected files. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/t3001-ls-files-others-exclude.sh | 5 +++++ t/t7300-clean.sh | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index 1ec7cb57c7..ac05d1a179 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -292,6 +292,11 @@ EOF test_cmp expect actual ' +test_expect_failure 'ls-files with "**" patterns and --directory' ' + # Expectation same as previous test + git ls-files --directory -o -i --exclude "**/a.1" >actual && + test_cmp expect actual +' test_expect_success 'ls-files with "**" patterns and no slashes' ' git ls-files -o -i --exclude "one**a.1" >actual && diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 07e8ba2d4b..34c08c3254 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -769,4 +769,23 @@ test_expect_failure 'avoid traversing into ignored directories' ' test_cmp trace.expect trace.relevant ' +test_expect_failure 'traverse into directories that may have ignored entries' ' + test_when_finished rm -f output && + test_create_repo need-to-traverse-into-hierarchy && + ( + cd need-to-traverse-into-hierarchy && + mkdir -p modules/foobar/src/generated && + > modules/foobar/src/generated/code.c && + > modules/foobar/Makefile && + echo "/modules/**/src/generated/" >.gitignore && + + git clean -fX modules/foobar >../output && + + grep Removing ../output && + + test_path_is_missing modules/foobar/src/generated/code.c && + test_path_is_file modules/foobar/Makefile + ) +' + test_done From aa6e1b21e5de8fae7121b8c6543e1482144c1ed3 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:19 +0000 Subject: [PATCH 6/9] dir: avoid unnecessary traversal into ignored directory The show_other_directories case in treat_directory() tried to handle both excludes and untracked files with the same logic, and mishandled both the excludes and the untracked files in the process, in different ways. Split that logic apart, and then focus on the logic for the excludes; a subsequent commit will address the logic for untracked files. For show_other_directories, an excluded directory means that every path underneath that directory will also be excluded. Given that the calling code requested to just show directories when everything under a directory had the same state (that's what the "DIR_SHOW_OTHER_DIRECTORIES" flag means), we generally do not need to traverse into such directories and can just immediately mark them as ignored (i.e. as path_excluded). The only reason we cannot just immediately return path_excluded is the DIR_HIDE_EMPTY_DIRECTORIES flag and the possibility that the ignored directory is an empty directory. The code previously treated DIR_SHOW_IGNORED_TOO in most cases as an exception as well, which was wrong. It can sometimes reduce the number of cases where we need to recurse (namely if DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set), but should not be able to increase the number of cases where we need to recurse. Fix the logic accordingly. Some sidenotes about possible confusion with dir.c: * "ignored" often refers to an untracked ignore", i.e. a file which is not tracked which matches one of the ignore/exclusion rules. But you can also have a "tracked ignore", a tracked file that happens to match one of the ignore/exclusion rules and which dir.c has to worry about since "git ls-files -c -i" is supposed to list them. * The dir code often uses "ignored" and "excluded" interchangeably, which you need to keep in mind while reading the code. * "exclude" is used multiple ways in the code: * As noted above, "exclude" is often a synonym for "ignored". * The logic for parsing .gitignore files was re-used in .git/info/sparse-checkout, except there it is used to mark paths that the user wants to *keep*. This was mostly addressed by commit 65edd96aec ("treewide: rename 'exclude' methods to 'pattern'", 2019-09-03), but every once in a while you'll find a comment about "exclude" referring to these patterns that might in fact be in use by the sparse-checkout machinery for inclusion rules. * The word "EXCLUDE" is also used for pathspec negation, as in (pathspec->items[3].magic & PATHSPEC_EXCLUDE) Thus if a user had a .gitignore file containing *~ *.log !settings.log And then ran git add -- 'settings.*' ':^settings.log' Then :^settings.log is a pathspec negation making settings.log not be requested to be added even though all other settings.* files are being added. Also, !settings.log in the gitignore file is a negative exclude pattern meaning that settings.log is normally a file we want to track even though all other *.log files are ignored. Sometimes it feels like dir.c needs its own glossary with its many definitions, including the multiply-defined terms. Reported-by: Jason Gore Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 44 +++++++++++++++++++++++++++++--------------- t/t7300-clean.sh | 2 +- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/dir.c b/dir.c index 151cc37e23..66261cf98b 100644 --- a/dir.c +++ b/dir.c @@ -1835,6 +1835,7 @@ static enum path_treatment treat_directory(struct dir_struct *dir, } /* This is the "show_other_directories" case */ + assert(dir->flags & DIR_SHOW_OTHER_DIRECTORIES); /* * If we have a pathspec which could match something _below_ this @@ -1845,27 +1846,40 @@ static enum path_treatment treat_directory(struct dir_struct *dir, if (matches_how == MATCHED_RECURSIVELY_LEADING_PATHSPEC) return path_recurse; + /* Special cases for where this directory is excluded/ignored */ + if (excluded) { + /* + * In the show_other_directories case, if we're not + * hiding empty directories, there is no need to + * recurse into an ignored directory. + */ + if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) + return path_excluded; + + /* + * Even if we are hiding empty directories, we can still avoid + * recursing into ignored directories for DIR_SHOW_IGNORED_TOO + * if DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set. + */ + if ((dir->flags & DIR_SHOW_IGNORED_TOO) && + (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) + return path_excluded; + } + /* - * Other than the path_recurse case immediately above, we only need - * to recurse into untracked/ignored directories if either of the - * following bits is set: + * Other than the path_recurse case above, we only need to + * recurse into untracked directories if either of the following + * bits is set: * - DIR_SHOW_IGNORED_TOO (because then we need to determine if * there are ignored entries below) * - DIR_HIDE_EMPTY_DIRECTORIES (because we have to determine if * the directory is empty) */ - if (!(dir->flags & (DIR_SHOW_IGNORED_TOO | DIR_HIDE_EMPTY_DIRECTORIES))) - return excluded ? path_excluded : path_untracked; - - /* - * ...and even if DIR_SHOW_IGNORED_TOO is set, we can still avoid - * recursing into ignored directories if the path is excluded and - * DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set. - */ - if (excluded && - (dir->flags & DIR_SHOW_IGNORED_TOO) && - (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) - return path_excluded; + if (!excluded && + !(dir->flags & (DIR_SHOW_IGNORED_TOO | + DIR_HIDE_EMPTY_DIRECTORIES))) { + return path_untracked; + } /* * Even if we don't want to know all the paths under an untracked or diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 34c08c3254..21e48b3ba5 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -746,7 +746,7 @@ test_expect_success 'clean untracked paths by pathspec' ' test_must_be_empty actual ' -test_expect_failure 'avoid traversing into ignored directories' ' +test_expect_success 'avoid traversing into ignored directories' ' test_when_finished rm -f output error trace.* && test_create_repo avoid-traversing-deep-hierarchy && ( From dd55fc0df15c338a8dbec6dec6ca6b58edc8acef Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:20 +0000 Subject: [PATCH 7/9] dir: traverse into untracked directories if they may have ignored subfiles A directory that is untracked does not imply that all files under it should be categorized as untracked; in particular, if the caller is interested in ignored files, many files or directories underneath the untracked directory may be ignored. We previously partially handled this right with DIR_SHOW_IGNORED_TOO, but missed DIR_SHOW_IGNORED. It was not obvious, though, because the logic for untracked and excluded files had been fused together making it harder to reason about. The previous commit split that logic out, making it easier to notice that DIR_SHOW_IGNORED was missing. Add it. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 10 ++++++---- t/t3001-ls-files-others-exclude.sh | 2 +- t/t7300-clean.sh | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dir.c b/dir.c index 66261cf98b..ed68b7e641 100644 --- a/dir.c +++ b/dir.c @@ -1868,15 +1868,17 @@ static enum path_treatment treat_directory(struct dir_struct *dir, /* * Other than the path_recurse case above, we only need to - * recurse into untracked directories if either of the following + * recurse into untracked directories if any of the following * bits is set: - * - DIR_SHOW_IGNORED_TOO (because then we need to determine if - * there are ignored entries below) + * - DIR_SHOW_IGNORED (because then we need to determine if + * there are ignored entries below) + * - DIR_SHOW_IGNORED_TOO (same as above) * - DIR_HIDE_EMPTY_DIRECTORIES (because we have to determine if * the directory is empty) */ if (!excluded && - !(dir->flags & (DIR_SHOW_IGNORED_TOO | + !(dir->flags & (DIR_SHOW_IGNORED | + DIR_SHOW_IGNORED_TOO | DIR_HIDE_EMPTY_DIRECTORIES))) { return path_untracked; } diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index ac05d1a179..516c95ea0e 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -292,7 +292,7 @@ EOF test_cmp expect actual ' -test_expect_failure 'ls-files with "**" patterns and --directory' ' +test_expect_success 'ls-files with "**" patterns and --directory' ' # Expectation same as previous test git ls-files --directory -o -i --exclude "**/a.1" >actual && test_cmp expect actual diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 21e48b3ba5..0399701e62 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -769,7 +769,7 @@ test_expect_success 'avoid traversing into ignored directories' ' test_cmp trace.expect trace.relevant ' -test_expect_failure 'traverse into directories that may have ignored entries' ' +test_expect_success 'traverse into directories that may have ignored entries' ' test_when_finished rm -f output && test_create_repo need-to-traverse-into-hierarchy && ( From 4e689d81718eb6e939cace317ea3e33cb994dcbb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 May 2021 17:28:21 +0000 Subject: [PATCH 8/9] dir: update stale description of treat_directory() The documentation comment for treat_directory() was originally written in 095952 (Teach directory traversal about subprojects, 2007-04-11) which was before the 'struct dir_struct' split its bitfield of named options into a 'flags' enum in 7c4c97c0 (Turn the flags in struct dir_struct into a single variable, 2009-02-16). When those flags changed, the comment became stale, since members like 'show_other_directories' transitioned into flags like DIR_SHOW_OTHER_DIRECTORIES. Update the comments for treat_directory() to use these flag names rather than the old member names. Signed-off-by: Derrick Stolee Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index ed68b7e641..ff004b298b 100644 --- a/dir.c +++ b/dir.c @@ -1740,13 +1740,13 @@ static enum exist_status directory_exists_in_index(struct index_state *istate, * Case 3: if we didn't have it in the index previously, we * have a few sub-cases: * - * (a) if "show_other_directories" is true, we show it as - * just a directory, unless "hide_empty_directories" is + * (a) if DIR_SHOW_OTHER_DIRECTORIES flag is set, we show it as + * just a directory, unless DIR_HIDE_EMPTY_DIRECTORIES is * also true, in which case we need to check if it contains any * untracked and / or ignored files. - * (b) if it looks like a git directory, and we don't have - * 'no_gitlinks' set we treat it as a gitlink, and show it - * as a directory. + * (b) if it looks like a git directory and we don't have the + * DIR_NO_GITLINKS flag, then we treat it as a gitlink, and + * show it as a directory. * (c) otherwise, we recurse into it. */ static enum path_treatment treat_directory(struct dir_struct *dir, @@ -1834,7 +1834,6 @@ static enum path_treatment treat_directory(struct dir_struct *dir, return path_recurse; } - /* This is the "show_other_directories" case */ assert(dir->flags & DIR_SHOW_OTHER_DIRECTORIES); /* @@ -1849,7 +1848,7 @@ static enum path_treatment treat_directory(struct dir_struct *dir, /* Special cases for where this directory is excluded/ignored */ if (excluded) { /* - * In the show_other_directories case, if we're not + * If DIR_SHOW_OTHER_DIRECTORIES is set and we're not * hiding empty directories, there is no need to * recurse into an ignored directory. */ From b548f0f1568f6b01e55ca69c24d3cb19489f92aa Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Wed, 12 May 2021 17:28:22 +0000 Subject: [PATCH 9/9] dir: introduce readdir_skip_dot_and_dotdot() helper Many places in the code were doing while ((d = readdir(dir)) != NULL) { if (is_dot_or_dotdot(d->d_name)) continue; ...process d... } Introduce a readdir_skip_dot_and_dotdot() helper to make that a one-liner: while ((d = readdir_skip_dot_and_dotdot(dir)) != NULL) { ...process d... } This helper particularly simplifies checks for empty directories. Also use this helper in read_cached_dir() so that our statistics are consistent across platforms. (In other words, read_cached_dir() should have been using is_dot_or_dotdot() and skipping such entries, but did not and left it to treat_path() to detect and mark such entries as path_none.) Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/clean.c | 4 +--- builtin/worktree.c | 4 +--- diff-no-index.c | 5 ++--- dir.c | 26 +++++++++++++++++--------- dir.h | 2 ++ entry.c | 5 +---- notes-merge.c | 5 +---- object-file.c | 4 +--- packfile.c | 5 +---- rerere.c | 4 +--- worktree.c | 12 +++--------- 11 files changed, 31 insertions(+), 45 deletions(-) diff --git a/builtin/clean.c b/builtin/clean.c index 995053b791..a1a5747615 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -189,10 +189,8 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, strbuf_complete(path, '/'); len = path->len; - while ((e = readdir(dir)) != NULL) { + while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL) { struct stat st; - if (is_dot_or_dotdot(e->d_name)) - continue; strbuf_setlen(path, len); strbuf_addstr(path, e->d_name); diff --git a/builtin/worktree.c b/builtin/worktree.c index 1cd5c2016e..e081ca9bef 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -118,10 +118,8 @@ static void prune_worktrees(void) struct dirent *d; if (!dir) return; - while ((d = readdir(dir)) != NULL) { + while ((d = readdir_skip_dot_and_dotdot(dir)) != NULL) { char *path; - if (is_dot_or_dotdot(d->d_name)) - continue; strbuf_reset(&reason); if (should_prune_worktree(d->d_name, &reason, &path, expire)) prune_worktree(d->d_name, reason.buf); diff --git a/diff-no-index.c b/diff-no-index.c index 7814eabfe0..e5cc878371 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -26,9 +26,8 @@ static int read_directory_contents(const char *path, struct string_list *list) if (!(dir = opendir(path))) return error("Could not open directory %s", path); - while ((e = readdir(dir))) - if (!is_dot_or_dotdot(e->d_name)) - string_list_insert(list, e->d_name); + while ((e = readdir_skip_dot_and_dotdot(dir))) + string_list_insert(list, e->d_name); closedir(dir); return 0; diff --git a/dir.c b/dir.c index ff004b298b..e47b4c507f 100644 --- a/dir.c +++ b/dir.c @@ -59,6 +59,18 @@ void dir_init(struct dir_struct *dir) memset(dir, 0, sizeof(*dir)); } +struct dirent * +readdir_skip_dot_and_dotdot(DIR *dirp) +{ + struct dirent *e; + + while ((e = readdir(dirp)) != NULL) { + if (!is_dot_or_dotdot(e->d_name)) + break; + } + return e; +} + int count_slashes(const char *s) { int cnt = 0; @@ -2332,7 +2344,7 @@ static int read_cached_dir(struct cached_dir *cdir) struct dirent *de; if (cdir->fdir) { - de = readdir(cdir->fdir); + de = readdir_skip_dot_and_dotdot(cdir->fdir); if (!de) { cdir->d_name = NULL; cdir->d_type = DT_UNKNOWN; @@ -2931,11 +2943,9 @@ int is_empty_dir(const char *path) if (!dir) return 0; - while ((e = readdir(dir)) != NULL) - if (!is_dot_or_dotdot(e->d_name)) { - ret = 0; - break; - } + e = readdir_skip_dot_and_dotdot(dir); + if (e) + ret = 0; closedir(dir); return ret; @@ -2975,10 +2985,8 @@ static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up) strbuf_complete(path, '/'); len = path->len; - while ((e = readdir(dir)) != NULL) { + while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL) { struct stat st; - if (is_dot_or_dotdot(e->d_name)) - continue; strbuf_setlen(path, len); strbuf_addstr(path, e->d_name); diff --git a/dir.h b/dir.h index 70c750e305..6b3fac0829 100644 --- a/dir.h +++ b/dir.h @@ -342,6 +342,8 @@ struct dir_struct { unsigned visited_directories; }; +struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp); + /*Count the number of slashes for string s*/ int count_slashes(const char *s); diff --git a/entry.c b/entry.c index 7b9f43716f..e3d3add300 100644 --- a/entry.c +++ b/entry.c @@ -56,12 +56,9 @@ static void remove_subtree(struct strbuf *path) if (!dir) die_errno("cannot opendir '%s'", path->buf); - while ((de = readdir(dir)) != NULL) { + while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) { struct stat st; - if (is_dot_or_dotdot(de->d_name)) - continue; - strbuf_addch(path, '/'); strbuf_addstr(path, de->d_name); if (lstat(path->buf, &st)) diff --git a/notes-merge.c b/notes-merge.c index d2771fa3d4..e9d6f86d34 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -695,13 +695,10 @@ int notes_merge_commit(struct notes_merge_options *o, strbuf_addch(&path, '/'); baselen = path.len; - while ((e = readdir(dir)) != NULL) { + while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL) { struct stat st; struct object_id obj_oid, blob_oid; - if (is_dot_or_dotdot(e->d_name)) - continue; - if (get_oid_hex(e->d_name, &obj_oid)) { if (o->verbosity >= 3) printf("Skipping non-SHA1 entry '%s%s'\n", diff --git a/object-file.c b/object-file.c index 624af408cd..77bdcfd21b 100644 --- a/object-file.c +++ b/object-file.c @@ -2304,10 +2304,8 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, strbuf_addch(path, '/'); baselen = path->len; - while ((de = readdir(dir))) { + while ((de = readdir_skip_dot_and_dotdot(dir))) { size_t namelen; - if (is_dot_or_dotdot(de->d_name)) - continue; namelen = strlen(de->d_name); strbuf_setlen(path, baselen); diff --git a/packfile.c b/packfile.c index ea29f4ba77..463d61c877 100644 --- a/packfile.c +++ b/packfile.c @@ -813,10 +813,7 @@ void for_each_file_in_pack_dir(const char *objdir, } strbuf_addch(&path, '/'); dirnamelen = path.len; - while ((de = readdir(dir)) != NULL) { - if (is_dot_or_dotdot(de->d_name)) - continue; - + while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) { strbuf_setlen(&path, dirnamelen); strbuf_addstr(&path, de->d_name); diff --git a/rerere.c b/rerere.c index dee60dc6df..d83d58df4f 100644 --- a/rerere.c +++ b/rerere.c @@ -1190,13 +1190,11 @@ void rerere_gc(struct repository *r, struct string_list *rr) if (!dir) die_errno(_("unable to open rr-cache directory")); /* Collect stale conflict IDs ... */ - while ((e = readdir(dir))) { + while ((e = readdir_skip_dot_and_dotdot(dir))) { struct rerere_dir *rr_dir; struct rerere_id id; int now_empty; - if (is_dot_or_dotdot(e->d_name)) - continue; if (!is_rr_cache_dirname(e->d_name)) continue; /* or should we remove e->d_name? */ diff --git a/worktree.c b/worktree.c index f35ac40a84..237517baee 100644 --- a/worktree.c +++ b/worktree.c @@ -128,10 +128,8 @@ struct worktree **get_worktrees(void) dir = opendir(path.buf); strbuf_release(&path); if (dir) { - while ((d = readdir(dir)) != NULL) { + while ((d = readdir_skip_dot_and_dotdot(dir)) != NULL) { struct worktree *linked = NULL; - if (is_dot_or_dotdot(d->d_name)) - continue; if ((linked = get_linked_worktree(d->d_name))) { ALLOC_GROW(list, counter + 1, alloc); @@ -486,13 +484,9 @@ int submodule_uses_worktrees(const char *path) if (!dir) return 0; - while ((d = readdir(dir)) != NULL) { - if (is_dot_or_dotdot(d->d_name)) - continue; - + d = readdir_skip_dot_and_dotdot(dir); + if (d != NULL) ret = 1; - break; - } closedir(dir); return ret; }