Merge branch 'ds/path-walk-filters'

The "git pack-objects --path-walk" traversal has been integrated
with several object filters, including blobless and sparse filters.

* ds/path-walk-filters:
  path-walk: support `combine` filter
  path-walk: support `object:type` filter
  path-walk: support `tree:0` filter
  t6601: tag otherwise-unreachable trees
  pack-objects: support sparse:oid filter with path-walk
  path-walk: add pl_sparse_trees to control tree pruning
  path-walk: support blob size limit filter
  backfill: die on incompatible filter options
  path-walk: support blobless filter
  path-walk: always emit directly-requested objects
  t/perf: add pack-objects filter and path-walk benchmark
  pack-objects: pass --objects with --path-walk
  t5620: make test work with path-walk var
main
Junio C Hamano 2026-06-02 16:15:29 +09:00
commit ffaa2eddd0
12 changed files with 1124 additions and 78 deletions

View File

@ -80,6 +80,10 @@ OPTIONS
+
You may also use commit-limiting options understood by
linkgit:git-rev-list[1] such as `--first-parent`, `--since`, or pathspecs.
+
Most `--filter=<spec>` options don't work with the purpose of
`git backfill`, but the `sparse:<oid>` filter is integrated to provide a
focused set of paths to download, distinct from the `--sparse` option.

SEE ALSO
--------

View File

@ -402,9 +402,11 @@ will be automatically changed to version `1`.
of filenames that cause collisions in Git's default name-hash
algorithm.
+
Incompatible with `--delta-islands`, `--shallow`, or `--filter`. The
`--use-bitmap-index` option will be ignored in the presence of
`--path-walk.`
Incompatible with `--delta-islands`. The `--use-bitmap-index` option is
ignored in the presence of `--path-walk`. The `--path-walk` option
supports the `--filter=<spec>` forms `blob:none`, `blob:limit=<n>`,
`tree:0`, `object:type=<type>`, and `sparse:<oid>`. These supported filter
types can be combined with the `combine:<spec>+<spec>` form.


DELTA ISLANDS

View File

@ -48,6 +48,13 @@ commits.
applications could disable some options to make it simpler to walk
the objects or to have fewer calls to `path_fn`.
+
Note that objects directly requested as pending objects (such as targets
of lightweight tags or other ref tips) are always emitted to `path_fn`,
even when the corresponding type flag is disabled. Only objects
discovered during the tree walk are subject to these type filters. This
ensures that objects specifically requested through the revision input
are never silently dropped.
+
While it is possible to walk only commits in this way, consumers would be
better off using the revision walk API instead.


View File

@ -96,9 +96,10 @@ static void reject_unsupported_rev_list_options(struct rev_info *revs)
if (revs->explicit_diff_merges)
die(_("'%s' cannot be used with 'git backfill'"),
"--diff-merges");
if (revs->filter.choice)
die(_("'%s' cannot be used with 'git backfill'"),
"--filter");
if (!path_walk_filter_compatible(&revs->filter))
die(_("cannot backfill with these filter options"));
if (revs->filter.blob_limit_value)
die(_("cannot backfill with blob size limits"));
}

static int do_backfill(struct backfill_context *ctx)
@ -108,6 +109,7 @@ static int do_backfill(struct backfill_context *ctx)

if (ctx->sparse) {
CALLOC_ARRAY(info.pl, 1);
info.pl_sparse_trees = 1;
if (get_sparse_checkout_patterns(info.pl)) {
path_walk_info_clear(&info);
return error(_("problem loading sparse-checkout"));

View File

@ -4764,7 +4764,7 @@ static int add_objects_by_path(const char *path,
return 0;
}

static void get_object_list_path_walk(struct rev_info *revs)
static int get_object_list_path_walk(struct rev_info *revs)
{
struct path_walk_info info = PATH_WALK_INFO_INIT;
unsigned int processed = 0;
@ -4787,8 +4787,9 @@ static void get_object_list_path_walk(struct rev_info *revs)
result = walk_objects_by_path(&info);
trace2_region_leave("pack-objects", "path-walk", revs->repo);

if (result)
die(_("failed to pack objects via path-walk"));
path_walk_info_clear(&info);

return result;
}

static void get_object_list(struct rev_info *revs, struct strvec *argv)
@ -4851,8 +4852,13 @@ static void get_object_list(struct rev_info *revs, struct strvec *argv)
fn_show_object = show_object;

if (path_walk) {
get_object_list_path_walk(revs);
} else {
if (get_object_list_path_walk(revs)) {
warning(_("failed to pack objects via path-walk"));
path_walk = 0;
}
}

if (!path_walk) {
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
mark_edges_uninteresting(revs, show_edge, sparse);
@ -5187,7 +5193,7 @@ int cmd_pack_objects(int argc,

if (path_walk) {
const char *option = NULL;
if (filter_options.choice)
if (!path_walk_filter_compatible(&filter_options))
option = "--filter";
else if (use_delta_islands)
option = "--delta-islands";
@ -5200,10 +5206,7 @@ int cmd_pack_objects(int argc,
}
if (path_walk) {
strvec_push(&rp, "--boundary");
/*
* We must disable the bitmaps because we are removing
* the --objects / --objects-edge[-aggressive] options.
*/
strvec_push(&rp, "--objects");
use_bitmap_index = 0;
} else if (thin) {
use_internal_rev_list = 1;

View File

@ -9,6 +9,9 @@
#include "hashmap.h"
#include "hex.h"
#include "list-objects.h"
#include "list-objects-filter-options.h"
#include "object-name.h"
#include "odb.h"
#include "object.h"
#include "oid-array.h"
#include "path.h"
@ -178,11 +181,6 @@ static int add_tree_entries(struct path_walk_context *ctx,
return -1;
}

/* Skip this object if already seen. */
if (o->flags & SEEN)
continue;
o->flags |= SEEN;

strbuf_setlen(&path, base_len);
strbuf_add(&path, entry.path, entry.pathlen);

@ -193,6 +191,40 @@ static int add_tree_entries(struct path_walk_context *ctx,
if (type == OBJ_TREE)
strbuf_addch(&path, '/');

if (o->flags & SEEN) {
/*
* A tree with a shared OID may appear at multiple
* paths. Even though we already added this tree to
* the output at some other path, we still need to
* walk into it at this in-cone path to discover
* blobs that were not found at the earlier
* out-of-cone path.
*
* Only do this for paths not yet in our map, to
* avoid duplicate entries when the same tree OID
* appears at the same path across multiple commits.
*/
if (type == OBJ_TREE && ctx->info->pl &&
ctx->info->pl->use_cone_patterns &&
!ctx->info->pl_sparse_trees &&
!strmap_contains(&ctx->paths_to_lists, path.buf)) {
int dtype;
enum pattern_match_result m;
m = path_matches_pattern_list(path.buf, path.len,
path.buf + base_len,
&dtype,
ctx->info->pl,
ctx->repo->index);
if (m != NOT_MATCHED) {
add_path_to_list(ctx, path.buf, type,
&entry.oid,
!(o->flags & UNINTERESTING));
push_to_stack(ctx, path.buf);
}
}
continue;
}

if (ctx->info->pl) {
int dtype;
enum pattern_match_result match;
@ -202,7 +234,8 @@ static int add_tree_entries(struct path_walk_context *ctx,
ctx->repo->index);

if (ctx->info->pl->use_cone_patterns &&
match == NOT_MATCHED)
match == NOT_MATCHED &&
(type == OBJ_BLOB || ctx->info->pl_sparse_trees))
continue;
else if (!ctx->info->pl->use_cone_patterns &&
type == OBJ_BLOB &&
@ -237,6 +270,7 @@ static int add_tree_entries(struct path_walk_context *ctx,
continue;
}

o->flags |= SEEN;
add_path_to_list(ctx, path.buf, type, &entry.oid,
!(o->flags & UNINTERESTING));

@ -248,6 +282,17 @@ static int add_tree_entries(struct path_walk_context *ctx,
return 0;
}

/*
* Paths starting with '/' (e.g., "/tags", "/tagged-blobs") hold objects that
* were directly requested by 'pending' objects rather than discovered during
* tree traversal.
*/
static int path_is_for_direct_objects(const char *path)
{
ASSERT(path);
return path[0] == '/';
}

/*
* For each path in paths_to_explore, walk the trees another level
* and add any found blobs to the batch (but only if they exist and
@ -306,23 +351,57 @@ static int walk_path(struct path_walk_context *ctx,

if (list->type == OBJ_BLOB &&
ctx->revs->prune_data.nr &&
!path_is_for_direct_objects(path) &&
!match_pathspec(ctx->repo->index, &ctx->revs->prune_data,
path, strlen(path), 0,
NULL, 0))
return 0;

/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
(list->type == OBJ_TAG && ctx->info->tags))
/*
* Evaluate function pointer on this data, if requested.
* Ignore object type filters for tagged objects (path starts
* with `/`), first for blobs and then other types.
*/
if (list->type == OBJ_BLOB &&
ctx->info->blob_limit &&
!path_is_for_direct_objects(path)) {
struct oid_array filtered = OID_ARRAY_INIT;

for (size_t i = 0; i < list->oids.nr; i++) {
unsigned long size;

if (odb_read_object_info(ctx->repo->objects,
&list->oids.oid[i],
&size) != OBJ_BLOB ||
size < ctx->info->blob_limit)
oid_array_append(&filtered,
&list->oids.oid[i]);
}

if (filtered.nr)
ret = ctx->info->path_fn(path, &filtered, list->type,
ctx->info->path_fn_data);
oid_array_clear(&filtered);
} else if ((!ctx->info->strict_types && path_is_for_direct_objects(path)) ||
(list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
(list->type == OBJ_TAG && ctx->info->tags)) {
ret = ctx->info->path_fn(path, &list->oids, list->type,
ctx->info->path_fn_data);
}

/* Expand data for children. */
if (list->type == OBJ_TREE) {
/*
* Expand tree children, except when the set is directly requested
* _and_ we are otherwise filtering out trees.
*/
if (list->type == OBJ_TREE &&
(!path_is_for_direct_objects(path) || ctx->info->trees)) {
/* Use root path if expanding from tagged/direct trees. */
const char *expand_path = !strcmp(path, "/tagged-trees")
? root_path : path;
for (size_t i = 0; i < list->oids.nr; i++) {
ret |= add_tree_entries(ctx,
path,
expand_path,
&list->oids.oid[i]);
}
}
@ -370,14 +449,12 @@ static int setup_pending_objects(struct path_walk_info *info,
{
struct type_and_oid_list *tags = NULL;
struct type_and_oid_list *tagged_blobs = NULL;
struct type_and_oid_list *root_tree_list = NULL;
struct type_and_oid_list *tagged_trees = NULL;

if (info->tags)
CALLOC_ARRAY(tags, 1);
if (info->blobs)
CALLOC_ARRAY(tagged_blobs, 1);
if (info->trees)
root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);
CALLOC_ARRAY(tagged_blobs, 1);
CALLOC_ARRAY(tagged_trees, 1);

/*
* Pending objects include:
@ -421,22 +498,19 @@ static int setup_pending_objects(struct path_walk_info *info,

switch (obj->type) {
case OBJ_TREE:
if (!info->trees)
continue;
if (pending->path) {
char *path = *pending->path ? xstrfmt("%s/", pending->path)
: xstrdup("");
if (pending->path && *pending->path) {
char *path = xstrfmt("%s/", pending->path);
add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1);
free(path);
} else if (!pending->path || !info->trees) {
oid_array_append(&tagged_trees->oids, &obj->oid);
} else {
/* assume a root tree, such as a lightweight tag. */
oid_array_append(&root_tree_list->oids, &obj->oid);
add_path_to_list(ctx, root_path, OBJ_TREE,
&obj->oid, 1);
}
break;

case OBJ_BLOB:
if (!info->blobs)
continue;
if (pending->path)
add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1);
else
@ -469,6 +543,18 @@ static int setup_pending_objects(struct path_walk_info *info,
free(tagged_blobs);
}
}
if (tagged_trees) {
if (tagged_trees->oids.nr) {
const char *tagged_tree_path = "/tagged-trees";
tagged_trees->type = OBJ_TREE;
tagged_trees->maybe_interesting = 1;
strmap_put(&ctx->paths_to_lists, tagged_tree_path, tagged_trees);
push_to_stack(ctx, tagged_tree_path);
} else {
oid_array_clear(&tagged_trees->oids);
free(tagged_trees);
}
}
if (tags) {
if (tags->oids.nr) {
const char *tag_path = "/tags";
@ -485,6 +571,123 @@ static int setup_pending_objects(struct path_walk_info *info,
return 0;
}

static int prepare_filters_one(struct path_walk_info *info,
struct list_objects_filter_options *options)
{
switch (options->choice) {
case LOFC_DISABLED:
return 1;

case LOFC_BLOB_NONE:
if (info) {
info->blobs = 0;
list_objects_filter_release(options);
}
return 1;

case LOFC_BLOB_LIMIT:
if (info) {
if (!options->blob_limit_value)
info->blobs = 0;
else if (!info->blob_limit ||
info->blob_limit > options->blob_limit_value)
info->blob_limit = options->blob_limit_value;
list_objects_filter_release(options);
}
return 1;

case LOFC_TREE_DEPTH:
if (options->tree_exclude_depth) {
error(_("tree:%lu filter not supported by the path-walk API"),
options->tree_exclude_depth);
return 0;
}
if (info) {
info->trees = 0;
info->blobs = 0;
}
return 1;

case LOFC_OBJECT_TYPE:
if (info) {
info->commits &= options->object_type == OBJ_COMMIT;
info->tags &= options->object_type == OBJ_TAG;
info->trees &= options->object_type == OBJ_TREE;
info->blobs &= options->object_type == OBJ_BLOB;
info->strict_types = 1;
list_objects_filter_release(options);
}
return 1;

case LOFC_SPARSE_OID:
if (info) {
struct object_id sparse_oid;
struct repository *repo = info->revs->repo;

if (info->pl) {
warning(_("sparse filter cannot be combined with existing sparse patterns"));
return 0;
}

if (repo_get_oid_with_flags(repo,
options->sparse_oid_name,
&sparse_oid,
GET_OID_BLOB)) {
error(_("unable to access sparse blob in '%s'"),
options->sparse_oid_name);
return 0;
}

CALLOC_ARRAY(info->pl, 1);
info->pl->use_cone_patterns = 1;

if (add_patterns_from_blob_to_list(&sparse_oid, "", 0,
info->pl) < 0) {
clear_pattern_list(info->pl);
FREE_AND_NULL(info->pl);
error(_("unable to parse sparse filter data in '%s'"),
oid_to_hex(&sparse_oid));
return 0;
}

if (!info->pl->use_cone_patterns) {
clear_pattern_list(info->pl);
FREE_AND_NULL(info->pl);
warning(_("sparse filter is not cone-mode compatible"));
return 0;
}
}
return 1;

case LOFC_COMBINE:
for (size_t i = 0; i < options->sub_nr; i++) {
if (!prepare_filters_one(info, &options->sub[i]))
return 0;
}
return 1;

default:
error(_("object filter '%s' not supported by the path-walk API"),
list_objects_filter_spec(options));
return 0;
}
}

static int prepare_filters(struct path_walk_info *info,
struct list_objects_filter_options *options)
{
if (!prepare_filters_one(info, options))
return 0;
if (info)
list_objects_filter_release(options);
return 1;
}

int path_walk_filter_compatible(struct list_objects_filter_options *options)
{
return prepare_filters(NULL, options);
}

/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
* call 'info->path_fn' on each discovered path.
@ -512,6 +715,9 @@ int walk_objects_by_path(struct path_walk_info *info)

trace2_region_enter("path-walk", "commit-walk", info->revs->repo);

if (!prepare_filters(info, &info->revs->filter))
return -1;

CALLOC_ARRAY(commit_list, 1);
commit_list->type = OBJ_COMMIT;

@ -532,15 +738,17 @@ int walk_objects_by_path(struct path_walk_info *info)
push_to_stack(&ctx, root_path);

/*
* Set these values before preparing the walk to catch
* lightweight tags pointing to non-commits and indexed objects.
* Ensure that prepare_revision_walk() keeps all pending objects
* even through an object type filter.
*/
info->revs->blob_objects = info->blobs;
info->revs->tree_objects = info->trees;
info->revs->blob_objects = info->revs->tree_objects = 1;

if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));

info->revs->blob_objects = info->blobs;
info->revs->tree_objects = info->trees;

/*
* Walk trees to mark them as UNINTERESTING.
* This is particularly important when 'edge_aggressive' is set.

View File

@ -36,12 +36,30 @@ struct path_walk_info {
/**
* Initialize which object types the path_fn should be called on. This
* could also limit the walk to skip blobs if not set.
*
* Note: even when 'blobs' or 'trees' is disabled, objects that are
* directly requested as pending objects will still be emitted to
* path_fn. Only objects discovered during the tree walk are filtered by
* these flags.
*/
int commits;
int trees;
int blobs;
int tags;

/**
* If 'strict_types' is 0, then direct object requests will no longer
* override the object type restrictions.
*/
int strict_types;

/**
* If non-zero, specifies a maximum blob size. Blobs with a
* size equal to or greater than this limit will not be
* emitted unless included in 'pending'.
*/
unsigned long blob_limit;

/**
* When 'prune_all_uninteresting' is set and a path has all objects
* marked as UNINTERESTING, then the path-walk will not visit those
@ -64,8 +82,14 @@ struct path_walk_info {
* of the cone. If not in cone mode, then all tree paths will be
* explored but the path_fn will only be called when the path matches
* the sparse-checkout patterns.
*
* When 'pl_sparse_trees' is zero, the sparse patterns only restrict
* blobs and all trees are included in the walk output. This matches
* the behavior of the sparse:oid object filter. When nonzero, trees
* are also pruned by the sparse patterns (as used by backfill).
*/
struct pattern_list *pl;
int pl_sparse_trees;
};

#define PATH_WALK_INFO_INIT { \
@ -85,3 +109,10 @@ void path_walk_info_clear(struct path_walk_info *info);
* Returns nonzero on an error.
*/
int walk_objects_by_path(struct path_walk_info *info);

struct list_objects_filter_options;
/**
* Given a set of options for filtering objects, return 1 if the options
* are compatible with the path-walk API and 0 otherwise.
*/
int path_walk_filter_compatible(struct list_objects_filter_options *options);

View File

@ -4,6 +4,7 @@
#include "dir.h"
#include "environment.h"
#include "hex.h"
#include "list-objects-filter-options.h"
#include "object-name.h"
#include "object.h"
#include "pretty.h"
@ -67,10 +68,12 @@ static int emit_block(const char *path, struct oid_array *oids,

int cmd__path_walk(int argc, const char **argv)
{
int res, stdin_pl = 0;
int res, stdin_pl = 0, pl_sparse_trees = -1;
struct rev_info revs = REV_INFO_INIT;
struct path_walk_info info = PATH_WALK_INFO_INIT;
struct path_walk_test_data data = { 0 };
struct list_objects_filter_options filter_options =
LIST_OBJECTS_FILTER_INIT;
struct option options[] = {
OPT_BOOL(0, "blobs", &info.blobs,
N_("toggle inclusion of blob objects")),
@ -86,11 +89,14 @@ int cmd__path_walk(int argc, const char **argv)
N_("toggle aggressive edge walk")),
OPT_BOOL(0, "stdin-pl", &stdin_pl,
N_("read a pattern list over stdin")),
OPT_BOOL(0, "pl-sparse-trees", &pl_sparse_trees,
N_("toggle pruning of trees by sparse patterns")),
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
OPT_END(),
};

setup_git_directory(the_repository);
revs.repo = the_repository;
repo_init_revisions(the_repository, &revs, NULL);

argc = parse_options(argc, argv, NULL,
options, path_walk_usage,
@ -101,6 +107,10 @@ int cmd__path_walk(int argc, const char **argv)
else
usage(path_walk_usage[0]);

/* Apply the filter after setup_revisions to avoid the --objects check. */
if (filter_options.choice)
list_objects_filter_copy(&revs.filter, &filter_options);

info.revs = &revs;
info.path_fn = emit_block;
info.path_fn_data = &data;
@ -108,6 +118,8 @@ int cmd__path_walk(int argc, const char **argv)
if (stdin_pl) {
struct strbuf in = STRBUF_INIT;
CALLOC_ARRAY(info.pl, 1);
info.pl_sparse_trees = (pl_sparse_trees >= 0) ?
pl_sparse_trees : 1;

info.pl->use_cone_patterns = 1;

@ -129,6 +141,7 @@ int cmd__path_walk(int argc, const char **argv)
free(info.pl);
}

list_objects_filter_release(&filter_options);
release_revisions(&revs);
return res;
}

View File

@ -0,0 +1,129 @@
#!/bin/sh

test_description='Tests pack-objects performance with filters and --path-walk'
. ./perf-lib.sh

test_perf_large_repo

test_expect_success 'setup filter inputs' '
# Sample a few depth-2 directories from the test repo to build
# a cone-mode sparse-checkout definition. The sampling picks
# directories at evenly-spaced positions so the choice is stable
# and scales to repos of any shape.

git ls-tree -d HEAD >top-entries &&
grep "^040000" top-entries |
awk "{print \$4;}" >top-dirs &&
top_nr=$(wc -l <top-dirs) &&

while read tdir
do
git ls-tree -d --format="$tdir/%(path)" "HEAD:$tdir" || return 1
done <top-dirs >depth2-dirs &&

d2_nr=$(wc -l <depth2-dirs) &&

if test "$d2_nr" -ge 2
then
# Pick two directories from evenly-spaced positions.
first=$(sed -n "1p" depth2-dirs) &&
mid=$(sed -n "$((d2_nr / 2 + 1))p" depth2-dirs) &&

p1=$(dirname "$first") &&
p2=$(dirname "$mid") &&

# Build cone-mode sparse-checkout patterns.
{
echo "/*" &&
echo "!/*/" &&
echo "/$p1/" &&
echo "!/$p1/*/" &&
if test "$p1" != "$p2"
then
echo "/$p2/" &&
echo "!/$p2/*/"
fi &&
echo "/$first/" &&
if test "$first" != "$mid"
then
echo "/$mid/"
fi
} >sparse-patterns &&

git hash-object -w sparse-patterns >sparse-oid &&
echo "Sparse cone: $first $mid" &&
cat sparse-patterns &&
test_set_prereq SPARSE_OID
elif test "$top_nr" -ge 1
then
# Fallback: use a single top-level directory.
first=$(sed -n "1p" top-dirs) &&
{
echo "/*" &&
echo "!/*/" &&
echo "/$first/"
} >sparse-patterns &&

git hash-object -w sparse-patterns >sparse-oid &&
echo "Sparse cone: $first" &&
cat sparse-patterns &&
test_set_prereq SPARSE_OID
fi
'

test_perf 'repack (no filter)' '
git pack-objects --stdout --no-reuse-delta --revs --all </dev/null >pk
'

test_size 'repack size (no filter)' '
test_file_size pk
'

test_perf 'repack (no filter, --path-walk)' '
git pack-objects --stdout --no-reuse-delta --revs --all --path-walk </dev/null >pk
'

test_size 'repack size (no filter, --path-walk)' '
test_file_size pk
'

test_perf 'repack (blob:none)' '
git pack-objects --stdout --no-reuse-delta --revs --all --filter=blob:none </dev/null >pk
'

test_size 'repack size (blob:none)' '
test_file_size pk
'

test_perf 'repack (blob:none, --path-walk)' '
git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \
--filter=blob:none </dev/null >pk
'

test_size 'repack size (blob:none, --path-walk)' '
test_file_size pk
'

test_perf 'repack (sparse:oid)' \
--prereq SPARSE_OID '
git pack-objects --stdout --no-reuse-delta --revs --all \
--filter=sparse:oid=$(cat sparse-oid) </dev/null >pk
'

test_size 'repack size (sparse:oid)' \
--prereq SPARSE_OID '
test_file_size pk
'

test_perf 'repack (sparse:oid, --path-walk)' \
--prereq SPARSE_OID '
git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \
--filter=sparse:oid=$(cat sparse-oid) </dev/null >pk
'

test_size 'repack size (sparse:oid, --path-walk)' \
--prereq SPARSE_OID '
test_file_size pk
'

test_done

View File

@ -478,4 +478,129 @@ test_expect_success 'verify pack-objects w/ --missing=allow-any' '
EOF
'

# Test that --path-walk produces the same object set as standard traversal
# when using sparse:oid filters with cone-mode patterns.
#
# The sparse:oid filter restricts only blobs, not trees. Both standard
# and path-walk should produce identical sets of blobs, commits, and trees.

test_expect_success 'setup pw_sparse for path-walk comparison' '
git init pw_sparse &&
mkdir -p pw_sparse/inc/sub pw_sparse/exc/sub &&

for n in 1 2
do
echo "inc $n" >pw_sparse/inc/file$n &&
echo "inc sub $n" >pw_sparse/inc/sub/file$n &&
echo "exc $n" >pw_sparse/exc/file$n &&
echo "exc sub $n" >pw_sparse/exc/sub/file$n &&
echo "root $n" >pw_sparse/root$n || return 1
done &&

git -C pw_sparse add . &&
git -C pw_sparse commit -m "first" &&

echo "inc 1 modified" >pw_sparse/inc/file1 &&
echo "exc 1 modified" >pw_sparse/exc/file1 &&
echo "root 1 modified" >pw_sparse/root1 &&
git -C pw_sparse add . &&
git -C pw_sparse commit -m "second" &&

# Cone-mode sparse pattern: include root + inc/
printf "/*\n!/*/\n/inc/\n" |
git -C pw_sparse hash-object -w --stdin >sparse_oid
'

test_expect_success 'sparse:oid with --path-walk produces same blobs' '
oid=$(cat sparse_oid) &&

git -C pw_sparse pack-objects --revs --stdout \
--filter=sparse:oid=$oid >standard.pack <<-EOF &&
HEAD
EOF
git -C pw_sparse index-pack ../standard.pack &&
git -C pw_sparse verify-pack -v ../standard.pack >standard_verify &&

git -C pw_sparse pack-objects --revs --stdout \
--path-walk --filter=sparse:oid=$oid >pathwalk.pack <<-EOF &&
HEAD
EOF
git -C pw_sparse index-pack ../pathwalk.pack &&
git -C pw_sparse verify-pack -v ../pathwalk.pack >pathwalk_verify &&

# Blobs must match exactly
grep -E "^[0-9a-f]{40} blob" standard_verify |
awk "{print \$1}" | sort >standard_blobs &&
grep -E "^[0-9a-f]{40} blob" pathwalk_verify |
awk "{print \$1}" | sort >pathwalk_blobs &&
test_cmp standard_blobs pathwalk_blobs &&

# Commits must match exactly
grep -E "^[0-9a-f]{40} commit" standard_verify |
awk "{print \$1}" | sort >standard_commits &&
grep -E "^[0-9a-f]{40} commit" pathwalk_verify |
awk "{print \$1}" | sort >pathwalk_commits &&
test_cmp standard_commits pathwalk_commits
'

test_expect_success 'sparse:oid with --path-walk includes all trees' '
# The sparse:oid filter restricts only blobs, not trees.
# Both standard and path-walk should include the same trees.
grep -E "^[0-9a-f]{40} tree" standard_verify |
awk "{print \$1}" | sort >standard_trees &&
grep -E "^[0-9a-f]{40} tree" pathwalk_verify |
awk "{print \$1}" | sort >pathwalk_trees &&

test_cmp standard_trees pathwalk_trees
'

# Test the edge case where the same tree/blob OID appears at both an
# in-cone and out-of-cone path. When sibling directories have identical
# contents, they share a tree OID. The path-walk defers marking objects
# SEEN until after checking sparse patterns, so an object at an out-of-cone
# path can still be discovered at an in-cone path.

test_expect_success 'setup pw_shared for shared OID across cone boundary' '
git init pw_shared &&
mkdir pw_shared/aaa pw_shared/zzz &&
echo "shared content" >pw_shared/aaa/file &&
echo "shared content" >pw_shared/zzz/file &&
echo "root file" >pw_shared/rootfile &&
git -C pw_shared add . &&
git -C pw_shared commit -m "aaa and zzz share tree OID" &&

# Verify they share a tree OID
aaa_tree=$(git -C pw_shared rev-parse HEAD:aaa) &&
zzz_tree=$(git -C pw_shared rev-parse HEAD:zzz) &&
test "$aaa_tree" = "$zzz_tree" &&

# Cone pattern: include root + zzz/ (not aaa/)
printf "/*\n!/*/\n/zzz/\n" |
git -C pw_shared hash-object -w --stdin >shared_sparse_oid
'

test_expect_success 'shared tree OID: --path-walk blobs match standard' '
oid=$(cat shared_sparse_oid) &&

git -C pw_shared pack-objects --revs --stdout \
--filter=sparse:oid=$oid >shared_std.pack <<-EOF &&
HEAD
EOF
git -C pw_shared index-pack ../shared_std.pack &&
git -C pw_shared verify-pack -v ../shared_std.pack >shared_std_verify &&

git -C pw_shared pack-objects --revs --stdout \
--path-walk --filter=sparse:oid=$oid >shared_pw.pack <<-EOF &&
HEAD
EOF
git -C pw_shared index-pack ../shared_pw.pack &&
git -C pw_shared verify-pack -v ../shared_pw.pack >shared_pw_verify &&

grep -E "^[0-9a-f]{40} blob" shared_std_verify |
awk "{print \$1}" | sort >shared_std_blobs &&
grep -E "^[0-9a-f]{40} blob" shared_pw_verify |
awk "{print \$1}" | sort >shared_pw_blobs &&
test_cmp shared_std_blobs shared_pw_blobs
'

test_done

View File

@ -15,6 +15,14 @@ test_expect_success 'backfill rejects unexpected arguments' '
test_grep "unrecognized argument: --unexpected-arg" err
'

test_expect_success 'backfill rejects incompatible filter options' '
test_must_fail git backfill --objects --filter=tree:1 2>err &&
test_grep "cannot backfill with these filter options" err &&

test_must_fail git backfill --objects --filter=blob:limit=10m 2>err &&
test_grep "cannot backfill with blob size limits" err
'

# We create objects in the 'src' repo.
test_expect_success 'setup repo for object creation' '
echo "{print \$1}" >print_1.awk &&

View File

@ -7,17 +7,15 @@ test_description='direct path-walk API tests'
test_expect_success 'setup test repository' '
git checkout -b base &&

# Make some objects that will only be reachable
# via non-commit tags.
mkdir child &&
echo file >child/file &&
git add child &&
git commit -m "will abandon" &&
git tag -a -m "tree" tree-tag HEAD^{tree} &&
echo file2 >file2 &&
git add file2 &&
git commit --amend -m "will abandon" &&
git tag tree-tag2 HEAD^{tree} &&
# Create tree objects that are only reachable via tags,
# not from any commit in the history.
child_blob_oid=$(echo "child blob content" | git hash-object -t blob -w --stdin) &&
child_tree_oid=$(printf "100644 blob %s\tfile\n" "$child_blob_oid" | git mktree) &&
tree_tag_oid=$(printf "040000 tree %s\tchild\n" "$child_tree_oid" | git mktree) &&
git tag -a -m "tree" tree-tag "$tree_tag_oid" &&
file2_blob_oid=$(echo "tagged tree file2" | git hash-object -t blob -w --stdin) &&
tree_tag2_oid=$(printf "040000 tree %s\tchild\n100644 blob %s\tfile2\n" "$child_tree_oid" "$file2_blob_oid" | git mktree) &&
git tag tree-tag2 "$tree_tag2_oid" &&

echo blob >file &&
blob_oid=$(git hash-object -t blob -w --stdin <file) &&
@ -26,7 +24,7 @@ test_expect_success 'setup test repository' '
blob2_oid=$(git hash-object -t blob -w --stdin <file2) &&
git tag blob-tag2 "$blob2_oid" &&

rm -fr child file file2 &&
rm -fr file file2 &&

mkdir left &&
mkdir right &&
@ -34,7 +32,7 @@ test_expect_success 'setup test repository' '
echo b >left/b &&
echo c >right/c &&
git add . &&
git commit --amend -m "first" &&
git commit -m "first" &&
git tag -m "first" first HEAD &&

echo d >right/d &&
@ -79,23 +77,23 @@ test_expect_success 'all' '
3:tree::$(git rev-parse base^{tree})
3:tree::$(git rev-parse base~1^{tree})
3:tree::$(git rev-parse base~2^{tree})
3:tree::$(git rev-parse refs/tags/tree-tag^{})
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
4:blob:a:$(git rev-parse base~2:a)
5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
6:tree:a/:$(git rev-parse base:a)
7:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
8:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
9:tree:left/:$(git rev-parse base:left)
9:tree:left/:$(git rev-parse base~2:left)
10:blob:left/b:$(git rev-parse base~2:left/b)
10:blob:left/b:$(git rev-parse base:left/b)
11:tree:right/:$(git rev-parse topic:right)
11:tree:right/:$(git rev-parse base~1:right)
11:tree:right/:$(git rev-parse base~2:right)
12:blob:right/c:$(git rev-parse base~2:right/c)
12:blob:right/c:$(git rev-parse topic:right/c)
13:blob:right/d:$(git rev-parse base~1:right/d)
5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{})
5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{})
6:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
7:tree:a/:$(git rev-parse base:a)
8:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
9:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
10:tree:left/:$(git rev-parse base:left)
10:tree:left/:$(git rev-parse base~2:left)
11:blob:left/b:$(git rev-parse base~2:left/b)
11:blob:left/b:$(git rev-parse base:left/b)
12:tree:right/:$(git rev-parse topic:right)
12:tree:right/:$(git rev-parse base~1:right)
12:tree:right/:$(git rev-parse base~2:right)
13:blob:right/c:$(git rev-parse base~2:right/c)
13:blob:right/c:$(git rev-parse topic:right/c)
14:blob:right/d:$(git rev-parse base~1:right/d)
blobs:10
commits:4
tags:7
@ -206,6 +204,43 @@ test_expect_success 'base & topic, sparse' '
test_cmp_sorted expect out
'

test_expect_success 'base & topic, sparse, no tree pruning' '
cat >patterns <<-EOF &&
/*
!/*/
/left/
EOF

test-tool path-walk --stdin-pl --no-pl-sparse-trees \
-- base topic <patterns >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:blob:a:$(git rev-parse base~2:a)
3:tree:a/:$(git rev-parse base:a)
4:tree:left/:$(git rev-parse base:left)
4:tree:left/:$(git rev-parse base~2:left)
5:blob:left/b:$(git rev-parse base~2:left/b)
5:blob:left/b:$(git rev-parse base:left/b)
6:tree:right/:$(git rev-parse topic:right)
6:tree:right/:$(git rev-parse base~1:right)
6:tree:right/:$(git rev-parse base~2:right)
blobs:3
commits:4
tags:0
trees:10
EOF

test_cmp_sorted expect out
'

test_expect_success 'topic only' '
test-tool path-walk -- topic >out &&

@ -415,4 +450,483 @@ test_expect_success 'trees are reported exactly once' '
test_line_count = 1 out-filtered
'

test_expect_success 'all, blob:none filter' '
test-tool path-walk --filter=blob:none -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
3:tree::$(git rev-parse base^{tree})
3:tree::$(git rev-parse base~1^{tree})
3:tree::$(git rev-parse base~2^{tree})
4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{})
4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{})
5:tree:a/:$(git rev-parse base:a)
6:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
7:tree:left/:$(git rev-parse base:left)
7:tree:left/:$(git rev-parse base~2:left)
8:tree:right/:$(git rev-parse topic:right)
8:tree:right/:$(git rev-parse base~1:right)
8:tree:right/:$(git rev-parse base~2:right)
blobs:2
commits:4
tags:7
trees:13
EOF

test_cmp_sorted expect out
'

test_expect_success 'topic only, blob:none filter' '
test-tool path-walk --filter=blob:none -- topic >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:tree:left/:$(git rev-parse base~2:left)
3:tree:right/:$(git rev-parse topic:right)
3:tree:right/:$(git rev-parse base~1:right)
3:tree:right/:$(git rev-parse base~2:right)
blobs:0
commits:3
tags:0
trees:7
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, blob:limit=0 filter' '
test-tool path-walk --filter=blob:limit=0 -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
3:tree::$(git rev-parse base^{tree})
3:tree::$(git rev-parse base~1^{tree})
3:tree::$(git rev-parse base~2^{tree})
4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{})
4:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{})
5:tree:a/:$(git rev-parse base:a)
6:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
7:tree:left/:$(git rev-parse base:left)
7:tree:left/:$(git rev-parse base~2:left)
8:tree:right/:$(git rev-parse topic:right)
8:tree:right/:$(git rev-parse base~1:right)
8:tree:right/:$(git rev-parse base~2:right)
blobs:2
commits:4
tags:7
trees:13
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, blob:limit=3 filter' '
test-tool path-walk --filter=blob:limit=3 -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
3:tree::$(git rev-parse base^{tree})
3:tree::$(git rev-parse base~1^{tree})
3:tree::$(git rev-parse base~2^{tree})
4:blob:a:$(git rev-parse base~2:a)
5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{})
5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{})
6:tree:a/:$(git rev-parse base:a)
7:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
8:tree:left/:$(git rev-parse base:left)
8:tree:left/:$(git rev-parse base~2:left)
9:blob:left/b:$(git rev-parse base~2:left/b)
10:tree:right/:$(git rev-parse topic:right)
10:tree:right/:$(git rev-parse base~1:right)
10:tree:right/:$(git rev-parse base~2:right)
11:blob:right/c:$(git rev-parse base~2:right/c)
12:blob:right/d:$(git rev-parse base~1:right/d)
blobs:6
commits:4
tags:7
trees:13
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, tree:0 filter' '
test-tool path-walk --filter=tree:0 -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{tree})
3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2)
blobs:2
commits:4
tags:7
trees:2
EOF

test_cmp_sorted expect out
'

test_expect_success 'topic only, tree:0 filter' '
test-tool path-walk --filter=tree:0 -- topic >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
blobs:0
commits:3
tags:0
trees:0
EOF

test_cmp_sorted expect out
'

test_expect_success 'tree:1 filter is rejected' '
test_must_fail test-tool path-walk --filter=tree:1 -- --all 2>err &&
test_grep "tree:1 filter not supported by the path-walk API" err
'

test_expect_success 'all, object:type=commit filter' '
test-tool path-walk --filter=object:type=commit -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
blobs:0
commits:4
tags:0
trees:0
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, object:type=tag filter' '
test-tool path-walk --filter=object:type=tag -- --all >out &&

cat >expect <<-EOF &&
0:tag:/tags:$(git rev-parse refs/tags/first)
0:tag:/tags:$(git rev-parse refs/tags/second.1)
0:tag:/tags:$(git rev-parse refs/tags/second.2)
0:tag:/tags:$(git rev-parse refs/tags/third)
0:tag:/tags:$(git rev-parse refs/tags/fourth)
0:tag:/tags:$(git rev-parse refs/tags/tree-tag)
0:tag:/tags:$(git rev-parse refs/tags/blob-tag)
blobs:0
commits:0
tags:7
trees:0
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, object:type=tree filter' '
test-tool path-walk --filter=object:type=tree -- --all >out &&

cat >expect <<-EOF &&
0:tree::$(git rev-parse topic^{tree})
0:tree::$(git rev-parse base^{tree})
0:tree::$(git rev-parse base~1^{tree})
0:tree::$(git rev-parse base~2^{tree})
1:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{})
1:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{})
2:tree:a/:$(git rev-parse base:a)
3:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
4:tree:left/:$(git rev-parse base:left)
4:tree:left/:$(git rev-parse base~2:left)
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right)
5:tree:right/:$(git rev-parse base~2:right)
blobs:0
commits:0
tags:0
trees:13
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, object:type=blob filter' '
test-tool path-walk --filter=object:type=blob -- --all >out &&

cat >expect <<-EOF &&
0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
1:blob:a:$(git rev-parse base~2:a)
2:blob:left/b:$(git rev-parse base:left/b)
2:blob:left/b:$(git rev-parse base~2:left/b)
3:blob:right/c:$(git rev-parse base~2:right/c)
3:blob:right/c:$(git rev-parse topic:right/c)
4:blob:right/d:$(git rev-parse base~1:right/d)
blobs:8
commits:0
tags:0
trees:0
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, combine:blob:none+tree:0 filter' '
test-tool path-walk \
--filter=combine:blob:none+tree:0 -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{tree})
3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2)
blobs:2
commits:4
tags:7
trees:2
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, combine:object:type=blob+blob:limit=3 filter' '
test-tool path-walk \
--filter=combine:object:type=blob+blob:limit=3 \
-- --all >out &&

cat >expect <<-EOF &&
0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
1:blob:a:$(git rev-parse base~2:a)
2:blob:left/b:$(git rev-parse base~2:left/b)
3:blob:right/c:$(git rev-parse base~2:right/c)
4:blob:right/d:$(git rev-parse base~1:right/d)
blobs:6
commits:0
tags:0
trees:0
EOF

test_cmp_sorted expect out
'

test_expect_success 'all, combine of disjoint object:types is empty' '
test-tool path-walk \
--filter=combine:object:type=blob+object:type=tree \
-- --all >out &&

cat >expect <<-EOF &&
blobs:0
commits:0
tags:0
trees:0
EOF

test_cmp_sorted expect out
'

test_expect_success 'combine: rejects unsupported subfilters' '
test_must_fail test-tool path-walk \
--filter=combine:tree:1+blob:none -- --all 2>err &&
test_grep "tree:1 filter not supported by the path-walk API" err
'

test_expect_success 'setup sparse filter blob' '
# Cone-mode patterns: include root, exclude all dirs, include left/
cat >patterns <<-\EOF &&
/*
!/*/
/left/
EOF
sparse_oid=$(git hash-object -w -t blob patterns)
'

test_expect_success 'all, sparse:oid filter' '
test-tool path-walk --filter=sparse:oid=$sparse_oid -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
3:tree::$(git rev-parse base^{tree})
3:tree::$(git rev-parse base~1^{tree})
3:tree::$(git rev-parse base~2^{tree})
4:blob:a:$(git rev-parse base~2:a)
5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{})
5:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2^{})
6:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
7:tree:a/:$(git rev-parse base:a)
8:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
9:tree:left/:$(git rev-parse base:left)
9:tree:left/:$(git rev-parse base~2:left)
10:blob:left/b:$(git rev-parse base~2:left/b)
10:blob:left/b:$(git rev-parse base:left/b)
11:tree:right/:$(git rev-parse topic:right)
11:tree:right/:$(git rev-parse base~1:right)
11:tree:right/:$(git rev-parse base~2:right)
blobs:6
commits:4
tags:7
trees:13
EOF

test_cmp_sorted expect out
'

test_expect_success 'topic only, sparse:oid filter' '
test-tool path-walk --filter=sparse:oid=$sparse_oid -- topic >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:blob:a:$(git rev-parse base~2:a)
3:tree:left/:$(git rev-parse base~2:left)
4:blob:left/b:$(git rev-parse base~2:left/b)
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right)
5:tree:right/:$(git rev-parse base~2:right)
blobs:2
commits:3
tags:0
trees:7
EOF

test_cmp_sorted expect out
'

# Demonstrate the SEEN flag ordering issue: when the same tree/blob OID
# appears at two sibling paths where one is in-cone and the other is
# out-of-cone, the path-walk must still discover blobs at the in-cone
# path even when the shared tree OID was first encountered out-of-cone.
# Since sparse:oid includes all trees, the out-of-cone tree (aaa/) is
# walked first, and its blob is skipped. The path-walk then re-walks
# the same tree OID at the in-cone path (zzz/) to find the blob there.

test_expect_success 'setup shared tree OID across cone boundary' '
git checkout --orphan shared-tree &&
git rm -rf . &&
mkdir aaa zzz &&
echo "shared content" >aaa/file &&
echo "shared content" >zzz/file &&
echo "root file" >rootfile &&
git add aaa zzz rootfile &&
git commit -m "aaa and zzz have same tree OID" &&

# Verify they really share a tree OID
aaa_tree=$(git rev-parse HEAD:aaa) &&
zzz_tree=$(git rev-parse HEAD:zzz) &&
test "$aaa_tree" = "$zzz_tree" &&

# Cone pattern: include root + zzz/ (not aaa/)
cat >shared-patterns <<-\EOF &&
/*
!/*/
/zzz/
EOF
shared_sparse_oid=$(git hash-object -w -t blob shared-patterns)
'

test_expect_success 'sparse:oid with shared tree OID across cone boundary' '
test-tool path-walk \
--filter=sparse:oid=$shared_sparse_oid \
-- shared-tree >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse shared-tree)
1:tree::$(git rev-parse shared-tree^{tree})
2:blob:rootfile:$(git rev-parse shared-tree:rootfile)
3:tree:aaa/:$(git rev-parse shared-tree:aaa)
4:tree:zzz/:$(git rev-parse shared-tree:zzz)
5:blob:zzz/file:$(git rev-parse shared-tree:zzz/file)
blobs:2
commits:1
tags:0
trees:3
EOF

test_cmp_sorted expect out
'

test_done