path-walk: support blobless filter

The 'git pack-objects' command can opt-in to using the path-walk API for
scanning the objects. Currently, this option is dynamically disabled if
combined with '--filter=<X>', even when using a simple filter such as
'blob:none' to signal a blobless packfile. This is a common scenario for
repos at scale, so is worth integrating.

Also, users can opt-in to the '--path-walk' option by default through
the pack.usePathWalk=true config option. When using that in a blobless
partial clone, the following warning can appear even though the user did
not specify either option directly:

  warning: cannot use --filter with --path-walk

Teach the path-walk API to handle the 'blob:none' object filter
natively. When revs->filter.choice is LOFC_BLOB_NONE, the path-walk
sets info->blobs to 0 (skipping all blob objects) and clears the
filter from revs so that prepare_revision_walk() does not reject the
configuration.

This check is implemented in the static prepare_filters() method, which
will simultaneously check if the input filters are compatible and will
make the appropriate mutations to the path_walk_info and filters if the
path_walk_info is non-NULL. This allows us to use this logic both in the
API method path_walk_filter_compatible() for use in
builtin/pack-objects.c and as a prep step in walk_objects_by_path().

Update the test helper (test-path-walk) to accept --filter=<spec>
as a test-tool option (before '--'), applying it to revs after
setup_revisions() to avoid the --objects requirement check. We can also
revert recent GIT_TEST_PACK_PATH_WALK overrides in t5620.

Also switch test-path-walk from REV_INFO_INIT with manual repo
assignment to repo_init_revisions(), which properly initializes
the filter_spec strbuf needed for filter parsing.

Add tests for blob:none with --all and with a single branch.

The performance test p5315 shows the impact of this change when using
blobless filters:

Test                                           HEAD~1     HEAD
---------------------------------------------------------------------
5315.6: repack (blob:none)                      13.53   13.87  +2.5%
5315.7: repack size (blob:none)                137.7M  137.8M  +0.1%
5315.8: repack (blob:none, --path-walk)         13.51   23.43 +73.4%
5315.9: repack size (blob:none, --path-walk)   137.7M  115.2M -16.3%

These performance tests were run on the Git repository. The --path-walk
feature shows meaningful space savings (16% smaller for blobless packs)
at the cost of increased computation time due to the two compression
passes. This data demonstrates that the feature is engaged and provides
real compression benefits when --no-reuse-delta forces fresh deltas.

Co-Authored-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
Derrick Stolee 2026-05-22 18:24:29 +00:00 committed by Junio C Hamano
parent 7a7070eebc
commit 6d87f0e8a3
7 changed files with 113 additions and 14 deletions

View File

@ -402,9 +402,9 @@ will be automatically changed to version `1`.
of filenames that cause collisions in Git's default name-hash
algorithm.
+
Incompatible with `--delta-islands`, `--shallow`, or `--filter`. The
`--use-bitmap-index` option will be ignored in the presence of
`--path-walk.`
Incompatible with `--delta-islands`. The `--use-bitmap-index` option is
ignored in the presence of `--path-walk`. The `--path-walk` option
supports the `--filter=<spec>` form `blob:none`.


DELTA ISLANDS

View File

@ -5177,7 +5177,7 @@ int cmd_pack_objects(int argc,

if (path_walk) {
const char *option = NULL;
if (filter_options.choice)
if (!path_walk_filter_compatible(&filter_options))
option = "--filter";
else if (use_delta_islands)
option = "--delta-islands";

View File

@ -9,6 +9,7 @@
#include "hashmap.h"
#include "hex.h"
#include "list-objects.h"
#include "list-objects-filter-options.h"
#include "object.h"
#include "oid-array.h"
#include "path.h"
@ -495,6 +496,32 @@ static int setup_pending_objects(struct path_walk_info *info,
return 0;
}

static int prepare_filters(struct path_walk_info *info,
struct list_objects_filter_options *options)
{
switch (options->choice) {
case LOFC_DISABLED:
return 1;

case LOFC_BLOB_NONE:
if (info) {
info->blobs = 0;
list_objects_filter_release(options);
}
return 1;

default:
error(_("object filter '%s' not supported by the path-walk API"),
list_objects_filter_spec(options));
return 0;
}
}

int path_walk_filter_compatible(struct list_objects_filter_options *options)
{
return prepare_filters(NULL, options);
}

/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
* call 'info->path_fn' on each discovered path.
@ -522,6 +549,9 @@ int walk_objects_by_path(struct path_walk_info *info)

trace2_region_enter("path-walk", "commit-walk", info->revs->repo);

if (!prepare_filters(info, &info->revs->filter))
return -1;

CALLOC_ARRAY(commit_list, 1);
commit_list->type = OBJ_COMMIT;


View File

@ -90,3 +90,10 @@ void path_walk_info_clear(struct path_walk_info *info);
* Returns nonzero on an error.
*/
int walk_objects_by_path(struct path_walk_info *info);

struct list_objects_filter_options;
/**
* Given a set of options for filtering objects, return 1 if the options
* are compatible with the path-walk API and 0 otherwise.
*/
int path_walk_filter_compatible(struct list_objects_filter_options *options);

View File

@ -4,6 +4,7 @@
#include "dir.h"
#include "environment.h"
#include "hex.h"
#include "list-objects-filter-options.h"
#include "object-name.h"
#include "object.h"
#include "pretty.h"
@ -71,6 +72,8 @@ int cmd__path_walk(int argc, const char **argv)
struct rev_info revs = REV_INFO_INIT;
struct path_walk_info info = PATH_WALK_INFO_INIT;
struct path_walk_test_data data = { 0 };
struct list_objects_filter_options filter_options =
LIST_OBJECTS_FILTER_INIT;
struct option options[] = {
OPT_BOOL(0, "blobs", &info.blobs,
N_("toggle inclusion of blob objects")),
@ -86,11 +89,12 @@ int cmd__path_walk(int argc, const char **argv)
N_("toggle aggressive edge walk")),
OPT_BOOL(0, "stdin-pl", &stdin_pl,
N_("read a pattern list over stdin")),
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
OPT_END(),
};

setup_git_directory();
revs.repo = the_repository;
repo_init_revisions(the_repository, &revs, NULL);

argc = parse_options(argc, argv, NULL,
options, path_walk_usage,
@ -101,6 +105,10 @@ int cmd__path_walk(int argc, const char **argv)
else
usage(path_walk_usage[0]);

/* Apply the filter after setup_revisions to avoid the --objects check. */
if (filter_options.choice)
list_objects_filter_copy(&revs.filter, &filter_options);

info.revs = &revs;
info.path_fn = emit_block;
info.path_fn_data = &data;
@ -129,6 +137,7 @@ int cmd__path_walk(int argc, const char **argv)
free(info.pl);
}

list_objects_filter_release(&filter_options);
release_revisions(&revs);
return res;
}

View File

@ -298,9 +298,6 @@ test_expect_success 'backfill with prefix pathspec' '
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&

# If we enable --path-walk here, we will get a warning overs stderr
# due to incompatibilities with --filter.
GIT_TEST_PACK_PATH_WALK=0 \
git -C backfill-path backfill HEAD -- d/f 2>err &&
test_must_be_empty err &&

@ -318,9 +315,6 @@ test_expect_success 'backfill with multiple pathspecs' '
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&

# If we enable --path-walk here, we will get a warning overs stderr
# due to incompatibilities with --filter.
GIT_TEST_PACK_PATH_WALK=0 \
git -C backfill-path backfill HEAD -- d/f a 2>err &&
test_must_be_empty err &&

@ -338,9 +332,6 @@ test_expect_success 'backfill with wildcard pathspec' '
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&

# If we enable --path-walk here, we will get a warning overs stderr
# due to incompatibilities with --filter.
GIT_TEST_PACK_PATH_WALK=0 \
git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err &&
test_must_be_empty err &&


View File

@ -415,4 +415,66 @@ test_expect_success 'trees are reported exactly once' '
test_line_count = 1 out-filtered
'

test_expect_success 'all, blob:none filter' '
test-tool path-walk --filter=blob:none -- --all >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
3:tree::$(git rev-parse base^{tree})
3:tree::$(git rev-parse base~1^{tree})
3:tree::$(git rev-parse base~2^{tree})
3:tree::$(git rev-parse refs/tags/tree-tag^{})
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
4:tree:a/:$(git rev-parse base:a)
5:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
6:tree:left/:$(git rev-parse base:left)
6:tree:left/:$(git rev-parse base~2:left)
7:tree:right/:$(git rev-parse topic:right)
7:tree:right/:$(git rev-parse base~1:right)
7:tree:right/:$(git rev-parse base~2:right)
blobs:2
commits:4
tags:7
trees:13
EOF

test_cmp_sorted expect out
'

test_expect_success 'topic only, blob:none filter' '
test-tool path-walk --filter=blob:none -- topic >out &&

cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:tree:left/:$(git rev-parse base~2:left)
3:tree:right/:$(git rev-parse topic:right)
3:tree:right/:$(git rev-parse base~1:right)
3:tree:right/:$(git rev-parse base~2:right)
blobs:0
commits:3
tags:0
trees:7
EOF

test_cmp_sorted expect out
'

test_done