Merge branch 'ds/path-walk-repack-fix' into maint-2.51

"git repack --path-walk" lost objects in some corner cases, which
has been corrected.
cf. <CABPp-BHFxxGrqKc0m==TjQNjDGdO=H5Rf6EFsf2nfE1=TuraOQ@mail.gmail.com>

* ds/path-walk-repack-fix:
  path-walk: create initializer for path lists
  path-walk: fix setup of pending objects
maint
Junio C Hamano 2025-10-15 10:29:29 -07:00
commit 03a3c40c2e
2 changed files with 88 additions and 30 deletions

View File

@ -105,6 +105,24 @@ static void push_to_stack(struct path_walk_context *ctx,
prio_queue_put(&ctx->path_stack, xstrdup(path));
}

static void add_path_to_list(struct path_walk_context *ctx,
const char *path,
enum object_type type,
struct object_id *oid,
int interesting)
{
struct type_and_oid_list *list = strmap_get(&ctx->paths_to_lists, path);

if (!list) {
CALLOC_ARRAY(list, 1);
list->type = type;
strmap_put(&ctx->paths_to_lists, path, list);
}

list->maybe_interesting |= interesting;
oid_array_append(&list->oids, oid);
}

static int add_tree_entries(struct path_walk_context *ctx,
const char *base_path,
struct object_id *oid)
@ -129,7 +147,6 @@ static int add_tree_entries(struct path_walk_context *ctx,

init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
while (tree_entry(&desc, &entry)) {
struct type_and_oid_list *list;
struct object *o;
/* Not actually true, but we will ignore submodules later. */
enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB;
@ -190,17 +207,10 @@ static int add_tree_entries(struct path_walk_context *ctx,
continue;
}

if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) {
CALLOC_ARRAY(list, 1);
list->type = type;
strmap_put(&ctx->paths_to_lists, path.buf, list);
}
add_path_to_list(ctx, path.buf, type, &entry.oid,
!(o->flags & UNINTERESTING));

push_to_stack(ctx, path.buf);

if (!(o->flags & UNINTERESTING))
list->maybe_interesting = 1;

oid_array_append(&list->oids, &entry.oid);
}

free_tree_buffer(tree);
@ -377,15 +387,9 @@ static int setup_pending_objects(struct path_walk_info *info,
if (!info->trees)
continue;
if (pending->path) {
struct type_and_oid_list *list;
char *path = *pending->path ? xstrfmt("%s/", pending->path)
: xstrdup("");
if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
CALLOC_ARRAY(list, 1);
list->type = OBJ_TREE;
strmap_put(&ctx->paths_to_lists, path, list);
}
oid_array_append(&list->oids, &obj->oid);
add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1);
free(path);
} else {
/* assume a root tree, such as a lightweight tag. */
@ -396,19 +400,10 @@ static int setup_pending_objects(struct path_walk_info *info,
case OBJ_BLOB:
if (!info->blobs)
continue;
if (pending->path) {
struct type_and_oid_list *list;
char *path = pending->path;
if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
CALLOC_ARRAY(list, 1);
list->type = OBJ_BLOB;
strmap_put(&ctx->paths_to_lists, path, list);
}
oid_array_append(&list->oids, &obj->oid);
} else {
/* assume a root tree, such as a lightweight tag. */
if (pending->path)
add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1);
else
oid_array_append(&tagged_blobs->oids, &obj->oid);
}
break;

case OBJ_COMMIT:

View File

@ -838,4 +838,67 @@ test_expect_success '-n overrides repack.updateServerInfo=true' '
test_server_info_missing
'

test_expect_success 'pending objects are repacked appropriately' '
test_when_finished rm -rf pending &&
git init pending &&

(
cd pending &&

# Commit file, a/b/c and never change them.
mkdir -p a/b &&
echo singleton >file &&
echo stuff >a/b/c &&
echo more >a/d &&
git add file a &&
git commit -m "single blobs" &&

# Files a/d and a/e will not be singletons.
echo d >a/d &&
echo e >a/e &&
git add a &&
git commit -m "more blobs" &&

# This use of a sparse index helps to force
# test that the cache-tree is walked, too.
git sparse-checkout set --sparse-index a x &&

# Create staged changes:
# * a/e now has multiple versions.
# * a/i now has only one version.
echo f >a/d &&
echo h >a/e &&
echo i >a/i &&
git add a &&

# Stage and unstage a change to make use of
# resolve-undo cache and how that impacts fsck.
mkdir x &&
echo y >x/y &&
git add x &&
xy=$(git rev-parse :x/y) &&
git rm --cached x/y &&

# The blob for x/y must persist through repacks,
# but fsck currently ignores the REUC extension
# for finding links to the blob.
cat >expect <<-EOF &&
dangling blob $xy
EOF

# Bring the loose objects into a packfile to avoid
# leftovers in next test. Without this, the loose
# objects persist and the test succeeds for other
# reasons.
git repack -adf &&
git fsck >out &&
test_cmp expect out &&

# Test path walk version with pack.useSparse.
git -c pack.useSparse=true repack -adf --path-walk &&
git fsck >out &&
test_cmp expect out
)
'

test_done