From 49ff3cb90fee9d0591b59a4c40ac2330163cab87 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 8 Aug 2022 19:07:49 +0000 Subject: [PATCH 1/4] checkout: fix nested sparse directory diff in sparse index Add the 'recursive' diff flag to the local changes reporting done by 'git checkout' in 'show_local_changes()'. Without the flag enabled, unexpanded sparse directories will not be recursed into to report the diff of each file's contents, resulting in the reported local changes including "modified" sparse directories. The same issue was found and fixed for 'git status' in 2c521b0e49 (status: fix nested sparse directory diff in sparse index, 2022-03-01) Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- builtin/checkout.c | 1 + t/t1092-sparse-checkout-compatibility.sh | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/builtin/checkout.c b/builtin/checkout.c index 2eefda81d8..1144c569c6 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -626,6 +626,7 @@ static void show_local_changes(struct object *head, repo_init_revisions(the_repository, &rev, NULL); rev.diffopt.flags = opts->flags; rev.diffopt.output_format |= DIFF_FORMAT_NAME_STATUS; + rev.diffopt.flags.recursive = 1; diff_setup_done(&rev.diffopt); add_pending_object(&rev, head, NULL); run_diff_index(&rev, 0); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index f9f8c988bb..83458b092c 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -372,6 +372,14 @@ test_expect_success 'deep changes during checkout' ' test_all_match git checkout base ' +test_expect_success 'checkout with modified sparse directory' ' + init_repos && + + test_all_match git checkout rename-in-to-out -- . && + test_sparse_match git sparse-checkout reapply && + test_all_match git checkout base +' + test_expect_success 'add outside sparse cone' ' init_repos && From 56d8a27124b9bdfcaece7728649104c9121141a6 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 8 Aug 2022 19:07:50 +0000 Subject: [PATCH 2/4] oneway_diff: handle removed sparse directories Update 'do_oneway_diff()' to perform a 'diff_tree_oid()' on removed sparse directories, as it does for added or modified sparse directories (see 9eb00af562 (diff-lib: handle index diffs with sparse dirs, 2021-07-14)). At the moment, this update is unreachable code because 'unpack_trees()' (currently the only way 'oneway_diff()' can be called, via 'diff_cache()') will always traverse trees down to the individual removed files of a deleted sparse directory. A subsequent patch will change this to better preserve a sparse index in other uses of 'unpack_tree()', e.g. 'git reset --hard'. However, making that change without this patch would result in (among other issues) 'git status' printing only the name of a deleted sparse directory, not its contents. To avoid introducing that bug, 'do_oneway_diff()' is updated before modifying 'unpack_trees()'. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- diff-lib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/diff-lib.c b/diff-lib.c index 7eb66a417a..2edea41a23 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -466,6 +466,11 @@ static void do_oneway_diff(struct unpack_trees_options *o, * Something removed from the tree? */ if (!idx) { + if (S_ISSPARSEDIR(tree->ce_mode)) { + diff_tree_oid(&tree->oid, NULL, tree->name, &revs->diffopt); + return; + } + diff_index_show_file(revs, "-", tree, &tree->oid, 1, tree->ce_mode, 0); return; From 9553aa0f6c844085b3fbf05da5440dd7096dd764 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 8 Aug 2022 19:07:51 +0000 Subject: [PATCH 3/4] cache.h: create 'index_name_pos_sparse()' Add 'index_name_pos_sparse()', which behaves the same as 'index_name_pos()', except that it does not expand a sparse index to search for an entry inside a sparse directory. 'index_entry_exists()' was originally implemented in 20ec2d034c (reset: make sparse-aware (except --mixed), 2021-11-29) as an alternative to 'index_name_pos()' to allow callers to search for an index entry without expanding a sparse index. However, that particular use case only required knowing whether the requested entry existed, so 'index_entry_exists()' does not return the index positioning information provided by 'index_name_pos()'. This patch implements 'index_name_pos_sparse()' to accommodate callers that need the positioning information of 'index_name_pos()', but do not want to expand the index. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- cache.h | 9 +++++++++ read-cache.c | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/cache.h b/cache.h index ac5ab4ef9d..33fc8337c6 100644 --- a/cache.h +++ b/cache.h @@ -830,6 +830,15 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na */ int index_name_pos(struct index_state *, const char *name, int namelen); +/* + * Like index_name_pos, returns the position of an entry of the given name in + * the index if one exists, otherwise returns a negative value where the negated + * value minus 1 is the position where the index entry would be inserted. Unlike + * index_name_pos, however, a sparse index is not expanded to find an entry + * inside a sparse directory. + */ +int index_name_pos_sparse(struct index_state *, const char *name, int namelen); + /* * Determines whether an entry with the given name exists within the * given index. The return value is 1 if an exact match is found, otherwise diff --git a/read-cache.c b/read-cache.c index 76f372ff91..06e5b06e63 100644 --- a/read-cache.c +++ b/read-cache.c @@ -620,6 +620,11 @@ int index_name_pos(struct index_state *istate, const char *name, int namelen) return index_name_stage_pos(istate, name, namelen, 0, EXPAND_SPARSE); } +int index_name_pos_sparse(struct index_state *istate, const char *name, int namelen) +{ + return index_name_stage_pos(istate, name, namelen, 0, NO_EXPAND_SPARSE); +} + int index_entry_exists(struct index_state *istate, const char *name, int namelen) { return index_name_stage_pos(istate, name, namelen, 0, NO_EXPAND_SPARSE) >= 0; From b15207b8cf1a1930fe5eb076c08c6ddc92d9282d Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 8 Aug 2022 19:07:52 +0000 Subject: [PATCH 4/4] unpack-trees: unpack new trees as sparse directories If 'unpack_single_entry()' is unpacking a new directory tree (that is, one not already present in the index) into a sparse index, unpack the tree as a sparse directory rather than traversing its contents and unpacking each file individually. This helps keep the sparse index as collapsed as possible in cases such as 'git reset --hard' restoring a outside-of-cone directory removed with 'git rm -r --sparse'. Without this patch, 'unpack_single_entry()' will only unpack a directory into the index as a sparse directory (rather than traversing into it and unpacking its files one-by-one) if an entry with the same name already exists in the index. This patch allows sparse directory unpacking without a matching index entry when the following conditions are met: 1. the directory's path is outside the sparse cone, and 2. there are no children of the directory in the index If a directory meets these requirements (as determined by 'is_new_sparse_dir()'), 'unpack_single_entry()' unpacks the sparse directory index entry and propagates the decision back up to 'unpack_callback()' to prevent unnecessary tree traversal into the unpacked directory. Reported-by: Shaoxuan Yuan Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 17 ++++ unpack-trees.c | 106 ++++++++++++++++++++--- 2 files changed, 113 insertions(+), 10 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 83458b092c..5dced39889 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -695,6 +695,23 @@ test_expect_success 'reset with wildcard pathspec' ' test_all_match git ls-files -s -- folder1 ' +test_expect_success 'reset hard with removed sparse dir' ' + init_repos && + + run_on_all git rm -r --sparse folder1 && + test_all_match git status --porcelain=v2 && + + test_all_match git reset --hard && + test_all_match git status --porcelain=v2 && + + cat >expect <<-\EOF && + folder1/ + EOF + + git -C sparse-index ls-files --sparse folder1 >out && + test_cmp expect out +' + test_expect_success 'update-index modify outside sparse definition' ' init_repos && diff --git a/unpack-trees.c b/unpack-trees.c index d561ca01ed..9ac7783e2e 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1069,6 +1069,67 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, return ce; } +/* + * Determine whether the path specified by 'p' should be unpacked as a new + * sparse directory in a sparse index. A new sparse directory 'A/': + * - must be outside the sparse cone. + * - must not already be in the index (i.e., no index entry with name 'A/' + * exists). + * - must not have any child entries in the index (i.e., no index entry + * 'A/' exists). + * If 'p' meets the above requirements, return 1; otherwise, return 0. + */ +static int entry_is_new_sparse_dir(const struct traverse_info *info, + const struct name_entry *p) +{ + int res, pos; + struct strbuf dirpath = STRBUF_INIT; + struct unpack_trees_options *o = info->data; + + if (!S_ISDIR(p->mode)) + return 0; + + /* + * If the path is inside the sparse cone, it can't be a sparse directory. + */ + strbuf_add(&dirpath, info->traverse_path, info->pathlen); + strbuf_add(&dirpath, p->path, p->pathlen); + strbuf_addch(&dirpath, '/'); + if (path_in_cone_mode_sparse_checkout(dirpath.buf, o->src_index)) { + res = 0; + goto cleanup; + } + + pos = index_name_pos_sparse(o->src_index, dirpath.buf, dirpath.len); + if (pos >= 0) { + /* Path is already in the index, not a new sparse dir */ + res = 0; + goto cleanup; + } + + /* Where would this sparse dir be inserted into the index? */ + pos = -pos - 1; + if (pos >= o->src_index->cache_nr) { + /* + * Sparse dir would be inserted at the end of the index, so we + * know it has no child entries. + */ + res = 1; + goto cleanup; + } + + /* + * If the dir has child entries in the index, the first would be at the + * position the sparse directory would be inserted. If the entry at this + * position is inside the dir, not a new sparse dir. + */ + res = strncmp(o->src_index->cache[pos]->name, dirpath.buf, dirpath.len); + +cleanup: + strbuf_release(&dirpath); + return res; +} + /* * Note that traverse_by_cache_tree() duplicates some logic in this function * without actually calling it. If you change the logic here you may need to @@ -1078,21 +1139,44 @@ static int unpack_single_entry(int n, unsigned long mask, unsigned long dirmask, struct cache_entry **src, const struct name_entry *names, - const struct traverse_info *info) + const struct traverse_info *info, + int *is_new_sparse_dir) { int i; struct unpack_trees_options *o = info->data; unsigned long conflicts = info->df_conflicts | dirmask; + const struct name_entry *p = names; - if (mask == dirmask && !src[0]) - return 0; + *is_new_sparse_dir = 0; + if (mask == dirmask && !src[0]) { + /* + * If we're not in a sparse index, we can't unpack a directory + * without recursing into it, so we return. + */ + if (!o->src_index->sparse_index) + return 0; + + /* Find first entry with a real name (we could use "mask" too) */ + while (!p->mode) + p++; + + /* + * If the directory is completely missing from the index but + * would otherwise be a sparse directory, we should unpack it. + * If not, we'll return and continue recursively traversing the + * tree. + */ + *is_new_sparse_dir = entry_is_new_sparse_dir(info, p); + if (!*is_new_sparse_dir) + return 0; + } /* - * When we have a sparse directory entry for src[0], - * then this isn't necessarily a directory-file conflict. + * When we are unpacking a sparse directory, then this isn't necessarily + * a directory-file conflict. */ - if (mask == dirmask && src[0] && - S_ISSPARSEDIR(src[0]->ce_mode)) + if (mask == dirmask && + (*is_new_sparse_dir || (src[0] && S_ISSPARSEDIR(src[0]->ce_mode)))) conflicts = 0; /* @@ -1352,7 +1436,7 @@ static int unpack_sparse_callback(int n, unsigned long mask, unsigned long dirma { struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; struct unpack_trees_options *o = info->data; - int ret; + int ret, is_new_sparse_dir; assert(o->merge); @@ -1376,7 +1460,7 @@ static int unpack_sparse_callback(int n, unsigned long mask, unsigned long dirma * "index" tree (i.e., names[0]) and adjust 'names', 'n', 'mask', and * 'dirmask' accordingly. */ - ret = unpack_single_entry(n - 1, mask >> 1, dirmask >> 1, src, names + 1, info); + ret = unpack_single_entry(n - 1, mask >> 1, dirmask >> 1, src, names + 1, info, &is_new_sparse_dir); if (src[0]) discard_cache_entry(src[0]); @@ -1394,6 +1478,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; struct unpack_trees_options *o = info->data; const struct name_entry *p = names; + int is_new_sparse_dir; /* Find first entry with a real name (we could use "mask" too) */ while (!p->mode) @@ -1440,7 +1525,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str } } - if (unpack_single_entry(n, mask, dirmask, src, names, info) < 0) + if (unpack_single_entry(n, mask, dirmask, src, names, info, &is_new_sparse_dir)) return -1; if (o->merge && src[0]) { @@ -1478,6 +1563,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str } if (!is_sparse_directory_entry(src[0], names, info) && + !is_new_sparse_dir && traverse_trees_recursive(n, dirmask, mask & ~dirmask, names, info) < 0) { return -1;