From ffd036b1286687589f5ce5513ee69c06ee53691e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 22 Jan 2016 17:27:33 -0500 Subject: [PATCH 1/2] clean: make is_git_repository a public function We have always had is_git_directory(), for looking at a specific directory to see if it contains a git repo. In 0179ca7 (clean: improve performance when removing lots of directories, 2015-06-15), we added is_git_repository() which checks for a non-bare repository by looking at its ".git" entry. However, the fix in 0179ca7 needs to be applied other places, too. Let's make this new helper globally available. We need to give it a better name, though, to avoid confusion with is_git_directory(). This patch does that, documents both functions with a comment to reduce confusion, and removes the clean-specific references in the comments. Based-on-a-patch-by: Andreas Krey Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/clean.c | 26 +------------------------- cache.h | 20 +++++++++++++++++++- setup.c | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/builtin/clean.c b/builtin/clean.c index d7acb94a95..919157bc2f 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -147,30 +147,6 @@ static int exclude_cb(const struct option *opt, const char *arg, int unset) return 0; } -/* - * Return 1 if the given path is the root of a git repository or - * submodule else 0. Will not return 1 for bare repositories with the - * exception of creating a bare repository in "foo/.git" and calling - * is_git_repository("foo"). - */ -static int is_git_repository(struct strbuf *path) -{ - int ret = 0; - int gitfile_error; - size_t orig_path_len = path->len; - assert(orig_path_len != 0); - strbuf_complete(path, '/'); - strbuf_addstr(path, ".git"); - if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf)) - ret = 1; - if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED || - gitfile_error == READ_GITFILE_ERR_READ_FAILED) - ret = 1; /* This could be a real .git file, take the - * safe option and avoid cleaning */ - strbuf_setlen(path, orig_path_len); - return ret; -} - static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, int dry_run, int quiet, int *dir_gone) { @@ -182,7 +158,7 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, *dir_gone = 1; - if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_git_repository(path)) { + if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_nonbare_repository_dir(path)) { if (!quiet) { quote_path_relative(path->buf, prefix, "ed); printf(dry_run ? _(msg_would_skip_git_dir) : _(msg_skip_git_dir), diff --git a/cache.h b/cache.h index c63fcc113a..67141a46e4 100644 --- a/cache.h +++ b/cache.h @@ -456,7 +456,6 @@ extern char *git_work_tree_cfg; extern int is_inside_work_tree(void); extern const char *get_git_dir(void); extern const char *get_git_common_dir(void); -extern int is_git_directory(const char *path); extern char *get_object_directory(void); extern char *get_index_file(void); extern char *get_graft_file(void); @@ -467,6 +466,25 @@ extern const char *get_git_namespace(void); extern const char *strip_namespace(const char *namespaced_ref); extern const char *get_git_work_tree(void); +/* + * Return true if the given path is a git directory; note that this _just_ + * looks at the directory itself. If you want to know whether "foo/.git" + * is a repository, you must feed that path, not just "foo". + */ +extern int is_git_directory(const char *path); + +/* + * Return 1 if the given path is the root of a git repository or + * submodule, else 0. Will not return 1 for bare repositories with the + * exception of creating a bare repository in "foo/.git" and calling + * is_git_repository("foo"). + * + * If we run into read errors, we err on the side of saying "yes, it is", + * as we usually consider sub-repos precious, and would prefer to err on the + * side of not disrupting or deleting them. + */ +extern int is_nonbare_repository_dir(struct strbuf *path); + #define READ_GITFILE_ERR_STAT_FAILED 1 #define READ_GITFILE_ERR_NOT_A_FILE 2 #define READ_GITFILE_ERR_OPEN_FAILED 3 diff --git a/setup.c b/setup.c index d34372520b..2c4b22c845 100644 --- a/setup.c +++ b/setup.c @@ -312,6 +312,23 @@ done: return ret; } +int is_nonbare_repository_dir(struct strbuf *path) +{ + int ret = 0; + int gitfile_error; + size_t orig_path_len = path->len; + assert(orig_path_len != 0); + strbuf_complete(path, '/'); + strbuf_addstr(path, ".git"); + if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf)) + ret = 1; + if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED || + gitfile_error == READ_GITFILE_ERR_READ_FAILED) + ret = 1; + strbuf_setlen(path, orig_path_len); + return ret; +} + int is_inside_git_dir(void) { if (inside_git_dir < 0) From a2d5156c2b0e6dbffc216b4a673156487a2f8b65 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 22 Jan 2016 17:29:30 -0500 Subject: [PATCH 2/2] resolve_gitlink_ref: ignore non-repository paths When we want to look up a submodule ref, we use get_ref_cache(path) to find or auto-create its ref cache. But if we feed a path that isn't actually a git repository, we blindly create the ref cache, and then may die deeper in the code when we try to access it. This is a problem because many callers speculatively feed us a path that looks vaguely like a repository, and expect us to tell them when it is not. This patch teaches resolve_gitlink_ref to reject non-repository paths without creating a ref_cache. This avoids the die(), and also performs better if you have a large number of these faux-submodule directories (because the ref_cache lookup is linear, under the assumption that there won't be a large number of submodules). To accomplish this, we also break get_ref_cache into two pieces: the lookup and auto-creation (the latter is lumped into create_ref_cache). This lets us first cheaply ask our cache "is it a submodule we know about?" If so, we can avoid repeating our filesystem lookup. So lookups of real submodules are not penalized; they examine the submodule's .git directory only once. The test in t3000 demonstrates a case where this improves correctness (we used to just die). The new perf case in p7300 shows off the speed improvement in an admittedly pathological repository: Test HEAD^ HEAD ---------------------------------------------------------------- 7300.4: ls-files -o 66.97(66.15+0.87) 0.33(0.08+0.24) -99.5% Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- refs/files-backend.c | 50 ++++++++++++++++++++++++++------------ t/perf/p7300-clean.sh | 4 +++ t/t3000-ls-files-others.sh | 7 ++++++ 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index c648b5e853..3a27f27585 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -933,6 +933,10 @@ static void clear_loose_ref_cache(struct ref_cache *refs) } } +/* + * Create a new submodule ref cache and add it to the internal + * set of caches. + */ static struct ref_cache *create_ref_cache(const char *submodule) { int len; @@ -942,9 +946,24 @@ static struct ref_cache *create_ref_cache(const char *submodule) len = strlen(submodule) + 1; refs = xcalloc(1, sizeof(struct ref_cache) + len); memcpy(refs->name, submodule, len); + refs->next = submodule_ref_caches; + submodule_ref_caches = refs; return refs; } +static struct ref_cache *lookup_ref_cache(const char *submodule) +{ + struct ref_cache *refs; + + if (!submodule || !*submodule) + return &ref_cache; + + for (refs = submodule_ref_caches; refs; refs = refs->next) + if (!strcmp(submodule, refs->name)) + return refs; + return NULL; +} + /* * Return a pointer to a ref_cache for the specified submodule. For * the main repository, use submodule==NULL. The returned structure @@ -953,18 +972,9 @@ static struct ref_cache *create_ref_cache(const char *submodule) */ static struct ref_cache *get_ref_cache(const char *submodule) { - struct ref_cache *refs; - - if (!submodule || !*submodule) - return &ref_cache; - - for (refs = submodule_ref_caches; refs; refs = refs->next) - if (!strcmp(submodule, refs->name)) - return refs; - - refs = create_ref_cache(submodule); - refs->next = submodule_ref_caches; - submodule_ref_caches = refs; + struct ref_cache *refs = lookup_ref_cache(submodule); + if (!refs) + refs = create_ref_cache(submodule); return refs; } @@ -1336,16 +1346,24 @@ static int resolve_gitlink_ref_recursive(struct ref_cache *refs, int resolve_gitlink_ref(const char *path, const char *refname, unsigned char *sha1) { int len = strlen(path), retval; - char *submodule; + struct strbuf submodule = STRBUF_INIT; struct ref_cache *refs; while (len && path[len-1] == '/') len--; if (!len) return -1; - submodule = xstrndup(path, len); - refs = get_ref_cache(submodule); - free(submodule); + + strbuf_add(&submodule, path, len); + refs = lookup_ref_cache(submodule.buf); + if (!refs) { + if (!is_nonbare_repository_dir(&submodule)) { + strbuf_release(&submodule); + return -1; + } + refs = create_ref_cache(submodule.buf); + } + strbuf_release(&submodule); retval = resolve_gitlink_ref_recursive(refs, refname, sha1, 0); return retval; diff --git a/t/perf/p7300-clean.sh b/t/perf/p7300-clean.sh index ec94cdd657..7c1888a27e 100755 --- a/t/perf/p7300-clean.sh +++ b/t/perf/p7300-clean.sh @@ -28,4 +28,8 @@ test_perf 'clean many untracked sub dirs, ignore nested git' ' git clean -n -q -f -f -d 100000_sub_dirs/ ' +test_perf 'ls-files -o' ' + git ls-files -o +' + test_done diff --git a/t/t3000-ls-files-others.sh b/t/t3000-ls-files-others.sh index 88be904c09..c525656b2c 100755 --- a/t/t3000-ls-files-others.sh +++ b/t/t3000-ls-files-others.sh @@ -65,6 +65,13 @@ test_expect_success '--no-empty-directory hides empty directory' ' test_cmp expected3 output ' +test_expect_success 'ls-files --others handles non-submodule .git' ' + mkdir not-a-submodule && + echo foo >not-a-submodule/.git && + git ls-files -o >output && + test_cmp expected1 output +' + test_expect_success SYMLINKS 'ls-files --others with symlinked submodule' ' git init super && git init sub &&