path-walk: support blob size limit filter
Extend the path-walk API to handle the 'blob:limit=<size>' object filter natively. This filter omits blobs whose size is equal to or greater than the given limit, matching the semantics used by the list-objects-filter machinery. When revs->filter.choice is LOFC_BLOB_LIMIT, the prepare_filters() method stores the limit value in info->blob_limit and clears the filter from revs. If the limit is zero, this degenerates to blob:none (all blobs excluded), so info->blobs is set to 0 instead. During walk_path(), blob batches are filtered before being delivered to the callback: each blob's size is checked via odb_read_object_info(), and only blobs strictly smaller than the limit are included. Blobs whose size cannot be determined (e.g. missing in a partial clone) are conservatively included, matching the existing filter behavior. Empty batches after filtering are skipped entirely. The check for inclusion in the path batch looks a little strange at first glance. We use odb_read_object_info() to read the object's size. Based on all of the assumptions to this point, this _should_ return OBJ_BLOB. Since we are focused on the size filter, we use a short-circuited OR (||) to skip the size check if that method returns a different object type. Notice that this inspection of object sizes requires the content to be present in the repository. The odb_read_object_info() call will download a missing blob on-demand. This means that the use of the path-walk API within 'git backfill' would not operate nicely with this filter type. The intention of that command is to download missing blobs in batches. Downloading objects one-by-one would go against the point. Update the validation in 'git backfill' to add its own compatibility check on top of path_walk_filter_compatible(). Add tests for blob:limit=0 (equivalent to blob:none) and blob:limit=3 (which exercises partial filtering within a batch where some blobs are kept and others are excluded). Co-authored-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>main
parent
bf24de4b7c
commit
f1b5d3da16
|
|
@ -404,7 +404,7 @@ will be automatically changed to version `1`.
|
|||
+
|
||||
Incompatible with `--delta-islands`. The `--use-bitmap-index` option is
|
||||
ignored in the presence of `--path-walk`. The `--path-walk` option
|
||||
supports the `--filter=<spec>` form `blob:none`.
|
||||
supports the `--filter=<spec>` forms `blob:none` and `blob:limit=<n>`.
|
||||
|
||||
|
||||
DELTA ISLANDS
|
||||
|
|
|
|||
|
|
@ -98,6 +98,8 @@ static void reject_unsupported_rev_list_options(struct rev_info *revs)
|
|||
"--diff-merges");
|
||||
if (!path_walk_filter_compatible(&revs->filter))
|
||||
die(_("cannot backfill with these filter options"));
|
||||
if (revs->filter.blob_limit_value)
|
||||
die(_("cannot backfill with blob size limits"));
|
||||
}
|
||||
|
||||
static int do_backfill(struct backfill_context *ctx)
|
||||
|
|
|
|||
41
path-walk.c
41
path-walk.c
|
|
@ -10,6 +10,7 @@
|
|||
#include "hex.h"
|
||||
#include "list-objects.h"
|
||||
#include "list-objects-filter-options.h"
|
||||
#include "odb.h"
|
||||
#include "object.h"
|
||||
#include "oid-array.h"
|
||||
#include "path.h"
|
||||
|
|
@ -327,13 +328,35 @@ static int walk_path(struct path_walk_context *ctx,
|
|||
/*
|
||||
* Evaluate function pointer on this data, if requested.
|
||||
* Ignore object type filters for tagged objects (path starts
|
||||
* with `/`).
|
||||
* with `/`), first for blobs and then other types.
|
||||
*/
|
||||
if ((list->type == OBJ_TREE && (ctx->info->trees || path_is_for_direct_objects(path))) ||
|
||||
(list->type == OBJ_BLOB && (ctx->info->blobs || path_is_for_direct_objects(path))) ||
|
||||
(list->type == OBJ_TAG && ctx->info->tags))
|
||||
if (list->type == OBJ_BLOB &&
|
||||
ctx->info->blob_limit &&
|
||||
!path_is_for_direct_objects(path)) {
|
||||
struct oid_array filtered = OID_ARRAY_INIT;
|
||||
|
||||
for (size_t i = 0; i < list->oids.nr; i++) {
|
||||
unsigned long size;
|
||||
|
||||
if (odb_read_object_info(ctx->repo->objects,
|
||||
&list->oids.oid[i],
|
||||
&size) != OBJ_BLOB ||
|
||||
size < ctx->info->blob_limit)
|
||||
oid_array_append(&filtered,
|
||||
&list->oids.oid[i]);
|
||||
}
|
||||
|
||||
if (filtered.nr)
|
||||
ret = ctx->info->path_fn(path, &filtered, list->type,
|
||||
ctx->info->path_fn_data);
|
||||
oid_array_clear(&filtered);
|
||||
} else if (path_is_for_direct_objects(path) ||
|
||||
(list->type == OBJ_TREE && ctx->info->trees) ||
|
||||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
|
||||
(list->type == OBJ_TAG && ctx->info->tags)) {
|
||||
ret = ctx->info->path_fn(path, &list->oids, list->type,
|
||||
ctx->info->path_fn_data);
|
||||
}
|
||||
|
||||
/* Expand data for children. */
|
||||
if (list->type == OBJ_TREE) {
|
||||
|
|
@ -510,6 +533,16 @@ static int prepare_filters(struct path_walk_info *info,
|
|||
}
|
||||
return 1;
|
||||
|
||||
case LOFC_BLOB_LIMIT:
|
||||
if (info) {
|
||||
if (!options->blob_limit_value)
|
||||
info->blobs = 0;
|
||||
else
|
||||
info->blob_limit = options->blob_limit_value;
|
||||
list_objects_filter_release(options);
|
||||
}
|
||||
return 1;
|
||||
|
||||
default:
|
||||
error(_("object filter '%s' not supported by the path-walk API"),
|
||||
list_objects_filter_spec(options));
|
||||
|
|
|
|||
|
|
@ -47,6 +47,13 @@ struct path_walk_info {
|
|||
int blobs;
|
||||
int tags;
|
||||
|
||||
/**
|
||||
* If non-zero, specifies a maximum blob size. Blobs with a
|
||||
* size equal to or greater than this limit will not be
|
||||
* emitted unless included in 'pending'.
|
||||
*/
|
||||
unsigned long blob_limit;
|
||||
|
||||
/**
|
||||
* When 'prune_all_uninteresting' is set and a path has all objects
|
||||
* marked as UNINTERESTING, then the path-walk will not visit those
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ test_expect_success 'backfill rejects incompatible filter options' '
|
|||
test_grep "cannot backfill with these filter options" err &&
|
||||
|
||||
test_must_fail git backfill --objects --filter=blob:limit=10m 2>err &&
|
||||
test_grep "cannot backfill with these filter options" err
|
||||
test_grep "cannot backfill with blob size limits" err
|
||||
'
|
||||
|
||||
# We create objects in the 'src' repo.
|
||||
|
|
|
|||
|
|
@ -477,4 +477,86 @@ test_expect_success 'topic only, blob:none filter' '
|
|||
test_cmp_sorted expect out
|
||||
'
|
||||
|
||||
test_expect_success 'all, blob:limit=0 filter' '
|
||||
test-tool path-walk --filter=blob:limit=0 -- --all >out &&
|
||||
|
||||
cat >expect <<-EOF &&
|
||||
0:commit::$(git rev-parse topic)
|
||||
0:commit::$(git rev-parse base)
|
||||
0:commit::$(git rev-parse base~1)
|
||||
0:commit::$(git rev-parse base~2)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/first)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/second.1)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/second.2)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/third)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/fourth)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
|
||||
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
|
||||
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
|
||||
3:tree::$(git rev-parse topic^{tree})
|
||||
3:tree::$(git rev-parse base^{tree})
|
||||
3:tree::$(git rev-parse base~1^{tree})
|
||||
3:tree::$(git rev-parse base~2^{tree})
|
||||
3:tree::$(git rev-parse refs/tags/tree-tag^{})
|
||||
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
|
||||
4:tree:a/:$(git rev-parse base:a)
|
||||
5:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
|
||||
6:tree:left/:$(git rev-parse base:left)
|
||||
6:tree:left/:$(git rev-parse base~2:left)
|
||||
7:tree:right/:$(git rev-parse topic:right)
|
||||
7:tree:right/:$(git rev-parse base~1:right)
|
||||
7:tree:right/:$(git rev-parse base~2:right)
|
||||
blobs:2
|
||||
commits:4
|
||||
tags:7
|
||||
trees:13
|
||||
EOF
|
||||
|
||||
test_cmp_sorted expect out
|
||||
'
|
||||
|
||||
test_expect_success 'all, blob:limit=3 filter' '
|
||||
test-tool path-walk --filter=blob:limit=3 -- --all >out &&
|
||||
|
||||
cat >expect <<-EOF &&
|
||||
0:commit::$(git rev-parse topic)
|
||||
0:commit::$(git rev-parse base)
|
||||
0:commit::$(git rev-parse base~1)
|
||||
0:commit::$(git rev-parse base~2)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/first)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/second.1)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/second.2)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/third)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/fourth)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
|
||||
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
|
||||
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
|
||||
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
|
||||
3:tree::$(git rev-parse topic^{tree})
|
||||
3:tree::$(git rev-parse base^{tree})
|
||||
3:tree::$(git rev-parse base~1^{tree})
|
||||
3:tree::$(git rev-parse base~2^{tree})
|
||||
3:tree::$(git rev-parse refs/tags/tree-tag^{})
|
||||
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
|
||||
4:blob:a:$(git rev-parse base~2:a)
|
||||
5:tree:a/:$(git rev-parse base:a)
|
||||
6:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
|
||||
7:tree:left/:$(git rev-parse base:left)
|
||||
7:tree:left/:$(git rev-parse base~2:left)
|
||||
8:blob:left/b:$(git rev-parse base~2:left/b)
|
||||
9:tree:right/:$(git rev-parse topic:right)
|
||||
9:tree:right/:$(git rev-parse base~1:right)
|
||||
9:tree:right/:$(git rev-parse base~2:right)
|
||||
10:blob:right/c:$(git rev-parse base~2:right/c)
|
||||
11:blob:right/d:$(git rev-parse base~1:right/d)
|
||||
blobs:6
|
||||
commits:4
|
||||
tags:7
|
||||
trees:13
|
||||
EOF
|
||||
|
||||
test_cmp_sorted expect out
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
|||
Loading…
Reference in New Issue