path-walk: always emit directly-requested objects

We are preparing to integrate the path-walk API with some --filter options
in 'git pack-objects', but there is a subtle issue that is revealed when
those are put together and the test suite is run with
GIT_TEST_PACK_PATH_WALK=1.

When a filter reduces the set of requested objects, this results in
filtering out directly-requested objects, such as in the download of needed
blobs in a blobless partial clone.

The root cause is that the scan of pending objects in the path-walk API
respects the filters set in the path_walk_info instead of overriding them
for pending objects.

We can tell that a path is part of the directly-referenced objects if its
path name starts with '/' (other paths, including root trees never have this
starting character). Create a path_is_for_direct_objects() to make this
meaning clear, especially as we add more references in the future as we
integrate the path-walk API with partial clone filter options.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
Derrick Stolee 2026-05-22 18:24:28 +00:00 committed by Junio C Hamano
parent 5406b62b21
commit 7a7070eebc
3 changed files with 39 additions and 15 deletions

View File

@ -48,6 +48,13 @@ commits.
applications could disable some options to make it simpler to walk
the objects or to have fewer calls to `path_fn`.
+
Note that objects directly requested as pending objects (such as targets
of lightweight tags or other ref tips) are always emitted to `path_fn`,
even when the corresponding type flag is disabled. Only objects
discovered during the tree walk are subject to these type filters. This
ensures that objects specifically requested through the revision input
are never silently dropped.
+
While it is possible to walk only commits in this way, consumers would be
better off using the revision walk API instead.


View File

@ -248,6 +248,17 @@ static int add_tree_entries(struct path_walk_context *ctx,
return 0;
}

/*
* Paths starting with '/' (e.g., "/tags", "/tagged-blobs") hold objects that
* were directly requested by 'pending' objects rather than discovered during
* tree traversal.
*/
static int path_is_for_direct_objects(const char *path)
{
ASSERT(path);
return path[0] == '/';
}

/*
* For each path in paths_to_explore, walk the trees another level
* and add any found blobs to the batch (but only if they exist and
@ -306,14 +317,19 @@ static int walk_path(struct path_walk_context *ctx,

if (list->type == OBJ_BLOB &&
ctx->revs->prune_data.nr &&
!path_is_for_direct_objects(path) &&
!match_pathspec(ctx->repo->index, &ctx->revs->prune_data,
path, strlen(path), 0,
NULL, 0))
return 0;

/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
/*
* Evaluate function pointer on this data, if requested.
* Ignore object type filters for tagged objects (path starts
* with `/`).
*/
if ((list->type == OBJ_TREE && (ctx->info->trees || path_is_for_direct_objects(path))) ||
(list->type == OBJ_BLOB && (ctx->info->blobs || path_is_for_direct_objects(path))) ||
(list->type == OBJ_TAG && ctx->info->tags))
ret = ctx->info->path_fn(path, &list->oids, list->type,
ctx->info->path_fn_data);
@ -374,10 +390,8 @@ static int setup_pending_objects(struct path_walk_info *info,

if (info->tags)
CALLOC_ARRAY(tags, 1);
if (info->blobs)
CALLOC_ARRAY(tagged_blobs, 1);
if (info->trees)
root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);
CALLOC_ARRAY(tagged_blobs, 1);
root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);

/*
* Pending objects include:
@ -421,8 +435,6 @@ static int setup_pending_objects(struct path_walk_info *info,

switch (obj->type) {
case OBJ_TREE:
if (!info->trees)
continue;
if (pending->path) {
char *path = *pending->path ? xstrfmt("%s/", pending->path)
: xstrdup("");
@ -435,8 +447,6 @@ static int setup_pending_objects(struct path_walk_info *info,
break;

case OBJ_BLOB:
if (!info->blobs)
continue;
if (pending->path)
add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1);
else
@ -532,15 +542,17 @@ int walk_objects_by_path(struct path_walk_info *info)
push_to_stack(&ctx, root_path);

/*
* Set these values before preparing the walk to catch
* lightweight tags pointing to non-commits and indexed objects.
* Ensure that prepare_revision_walk() keeps all pending objects
* even through an object type filter.
*/
info->revs->blob_objects = info->blobs;
info->revs->tree_objects = info->trees;
info->revs->blob_objects = info->revs->tree_objects = 1;

if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));

info->revs->blob_objects = info->blobs;
info->revs->tree_objects = info->trees;

/*
* Walk trees to mark them as UNINTERESTING.
* This is particularly important when 'edge_aggressive' is set.

View File

@ -36,6 +36,11 @@ struct path_walk_info {
/**
* Initialize which object types the path_fn should be called on. This
* could also limit the walk to skip blobs if not set.
*
* Note: even when 'blobs' or 'trees' is disabled, objects that are
* directly requested as pending objects will still be emitted to
* path_fn. Only objects discovered during the tree walk are filtered by
* these flags.
*/
int commits;
int trees;