Merge branch 'ps/cat-file-filter-batch'

"git cat-file --batch" and friends learned to allow "--filter=" to
omit certain objects, just like the transport layer does.

* ps/cat-file-filter-batch:
  builtin/cat-file: use bitmaps to efficiently filter by object type
  builtin/cat-file: deduplicate logic to iterate over all objects
  pack-bitmap: introduce function to check whether a pack is bitmapped
  pack-bitmap: add function to iterate over filtered bitmapped objects
  pack-bitmap: allow passing payloads to `show_reachable_fn()`
  builtin/cat-file: support "object:type=" objects filter
  builtin/cat-file: support "blob:limit=" objects filter
  builtin/cat-file: support "blob:none" objects filter
  builtin/cat-file: wire up an option to filter objects
  builtin/cat-file: introduce function to report object status
  builtin/cat-file: rename variable that tracks usage
main
Junio C Hamano 2025-04-16 13:54:20 -07:00
commit a271b05066
8 changed files with 411 additions and 82 deletions

View File

@ -81,6 +81,25 @@ OPTIONS
end-of-line conversion, etc). In this case, `<object>` has to be of
the form `<tree-ish>:<path>`, or `:<path>`.

--filter=<filter-spec>::
--no-filter::
Omit objects from the list of printed objects. This can only be used in
combination with one of the batched modes. Excluded objects that have
been explicitly requested via any of the batch modes that read objects
via standard input (`--batch`, `--batch-check`) will be reported as
"filtered". Excluded objects in `--batch-all-objects` mode will not be
printed at all. The '<filter-spec>' may be one of the following:
+
The form '--filter=blob:none' omits all blobs.
+
The form '--filter=blob:limit=<n>[kmg]' omits blobs of size at least n
bytes or units. n may be zero. The suffixes k, m, and g can be used to name
units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as
'blob:limit=1024'.
+
The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects which
are not of the requested type.

--path=<path>::
For use with `--textconv` or `--filters`, to allow specifying an object
name and a path separately, e.g. when it is difficult to figure out
@ -340,6 +359,13 @@ the repository, then `cat-file` will ignore any custom format and print:
<object> SP missing LF
------------

If a name is specified on stdin that is filtered out via `--filter=`,
then `cat-file` will ignore any custom format and print:

------------
<object> SP excluded LF
------------

If a name is specified that might refer to more than one object (an ambiguous short sha), then `cat-file` will ignore any custom format and print:

------------

View File

@ -15,11 +15,13 @@
#include "gettext.h"
#include "hex.h"
#include "ident.h"
#include "list-objects-filter-options.h"
#include "parse-options.h"
#include "userdiff.h"
#include "streaming.h"
#include "oid-array.h"
#include "packfile.h"
#include "pack-bitmap.h"
#include "object-file.h"
#include "object-name.h"
#include "object-store-ll.h"
@ -35,6 +37,7 @@ enum batch_mode {
};

struct batch_options {
struct list_objects_filter_options objects_filter;
int enabled;
int follow_symlinks;
enum batch_mode batch_mode;
@ -455,6 +458,16 @@ static void print_default_format(struct strbuf *scratch, struct expand_data *dat
(uintmax_t)data->size, opt->output_delim);
}

static void report_object_status(struct batch_options *opt,
const char *obj_name,
const struct object_id *oid,
const char *status)
{
printf("%s %s%c", obj_name ? obj_name : oid_to_hex(oid),
status, opt->output_delim);
fflush(stdout);
}

/*
* If "pack" is non-NULL, then "offset" is the byte offset within the pack from
* which the object may be accessed (though note that we may also rely on
@ -470,8 +483,13 @@ static void batch_object_write(const char *obj_name,
if (!data->skip_object_info) {
int ret;

if (use_mailmap)
if (use_mailmap ||
opt->objects_filter.choice == LOFC_BLOB_NONE ||
opt->objects_filter.choice == LOFC_BLOB_LIMIT ||
opt->objects_filter.choice == LOFC_OBJECT_TYPE)
data->info.typep = &data->type;
if (opt->objects_filter.choice == LOFC_BLOB_LIMIT)
data->info.sizep = &data->size;

if (pack)
ret = packed_object_info(the_repository, pack, offset,
@ -481,12 +499,42 @@ static void batch_object_write(const char *obj_name,
&data->oid, &data->info,
OBJECT_INFO_LOOKUP_REPLACE);
if (ret < 0) {
printf("%s missing%c",
obj_name ? obj_name : oid_to_hex(&data->oid), opt->output_delim);
fflush(stdout);
report_object_status(opt, obj_name, &data->oid, "missing");
return;
}

switch (opt->objects_filter.choice) {
case LOFC_DISABLED:
break;
case LOFC_BLOB_NONE:
if (data->type == OBJ_BLOB) {
if (!opt->all_objects)
report_object_status(opt, obj_name,
&data->oid, "excluded");
return;
}
break;
case LOFC_BLOB_LIMIT:
if (data->type == OBJ_BLOB &&
data->size >= opt->objects_filter.blob_limit_value) {
if (!opt->all_objects)
report_object_status(opt, obj_name,
&data->oid, "excluded");
return;
}
break;
case LOFC_OBJECT_TYPE:
if (data->type != opt->objects_filter.object_type) {
if (!opt->all_objects)
report_object_status(opt, obj_name,
&data->oid, "excluded");
return;
}
break;
default:
BUG("unsupported objects filter");
}

if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) {
size_t s = data->size;
char *buf = NULL;
@ -535,10 +583,10 @@ static void batch_one_object(const char *obj_name,
if (result != FOUND) {
switch (result) {
case MISSING_OBJECT:
printf("%s missing%c", obj_name, opt->output_delim);
report_object_status(opt, obj_name, &data->oid, "missing");
break;
case SHORT_NAME_AMBIGUOUS:
printf("%s ambiguous%c", obj_name, opt->output_delim);
report_object_status(opt, obj_name, &data->oid, "ambiguous");
break;
case DANGLING_SYMLINK:
printf("dangling %"PRIuMAX"%c%s%c",
@ -595,25 +643,18 @@ static int batch_object_cb(const struct object_id *oid, void *vdata)
return 0;
}

static int collect_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
{
oid_array_append(data, oid);
return 0;
}

static int collect_packed_object(const struct object_id *oid,
struct packed_git *pack UNUSED,
uint32_t pos UNUSED,
void *data)
static int collect_object(const struct object_id *oid,
struct packed_git *pack UNUSED,
off_t offset UNUSED,
void *data)
{
oid_array_append(data, oid);
return 0;
}

static int batch_unordered_object(const struct object_id *oid,
struct packed_git *pack, off_t offset,
struct packed_git *pack,
off_t offset,
void *vdata)
{
struct object_cb_data *data = vdata;
@ -627,23 +668,6 @@ static int batch_unordered_object(const struct object_id *oid,
return 0;
}

static int batch_unordered_loose(const struct object_id *oid,
const char *path UNUSED,
void *data)
{
return batch_unordered_object(oid, NULL, 0, data);
}

static int batch_unordered_packed(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
{
return batch_unordered_object(oid, pack,
nth_packed_object_offset(pack, pos),
data);
}

typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
struct strbuf *, struct expand_data *);

@ -776,6 +800,76 @@ static void batch_objects_command(struct batch_options *opt,

#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"

typedef int (*for_each_object_fn)(const struct object_id *oid, struct packed_git *pack,
off_t offset, void *data);

struct for_each_object_payload {
for_each_object_fn callback;
void *payload;
};

static int batch_one_object_loose(const struct object_id *oid,
const char *path UNUSED,
void *_payload)
{
struct for_each_object_payload *payload = _payload;
return payload->callback(oid, NULL, 0, payload->payload);
}

static int batch_one_object_packed(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *_payload)
{
struct for_each_object_payload *payload = _payload;
return payload->callback(oid, pack, nth_packed_object_offset(pack, pos),
payload->payload);
}

static int batch_one_object_bitmapped(const struct object_id *oid,
enum object_type type UNUSED,
int flags UNUSED,
uint32_t hash UNUSED,
struct packed_git *pack,
off_t offset,
void *_payload)
{
struct for_each_object_payload *payload = _payload;
return payload->callback(oid, pack, offset, payload->payload);
}

static void batch_each_object(struct batch_options *opt,
for_each_object_fn callback,
unsigned flags,
void *_payload)
{
struct for_each_object_payload payload = {
.callback = callback,
.payload = _payload,
};
struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);

for_each_loose_object(batch_one_object_loose, &payload, 0);

if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
batch_one_object_bitmapped, &payload)) {
struct packed_git *pack;

for (pack = get_all_packs(the_repository); pack; pack = pack->next) {
if (bitmap_index_contains_pack(bitmap, pack) ||
open_pack_index(pack))
continue;
for_each_object_in_pack(pack, batch_one_object_packed,
&payload, flags);
}
} else {
for_each_packed_object(the_repository, batch_one_object_packed,
&payload, flags);
}

free_bitmap_index(bitmap);
}

static int batch_objects(struct batch_options *opt)
{
struct strbuf input = STRBUF_INIT;
@ -812,7 +906,8 @@ static int batch_objects(struct batch_options *opt)
struct object_cb_data cb;
struct object_info empty = OBJECT_INFO_INIT;

if (!memcmp(&data.info, &empty, sizeof(empty)))
if (!memcmp(&data.info, &empty, sizeof(empty)) &&
opt->objects_filter.choice == LOFC_DISABLED)
data.skip_object_info = 1;

if (repo_has_promisor_remote(the_repository))
@ -829,18 +924,14 @@ static int batch_objects(struct batch_options *opt)

cb.seen = &seen;

for_each_loose_object(batch_unordered_loose, &cb, 0);
for_each_packed_object(the_repository, batch_unordered_packed,
&cb, FOR_EACH_OBJECT_PACK_ORDER);
batch_each_object(opt, batch_unordered_object,
FOR_EACH_OBJECT_PACK_ORDER, &cb);

oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;

for_each_loose_object(collect_loose_object, &sa, 0);
for_each_packed_object(the_repository, collect_packed_object,
&sa, 0);

batch_each_object(opt, collect_object, 0, &sa);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);

oid_array_clear(&sa);
@ -936,12 +1027,15 @@ int cmd_cat_file(int argc,
int opt_cw = 0;
int opt_epts = 0;
const char *exp_type = NULL, *obj_name = NULL;
struct batch_options batch = {0};
struct batch_options batch = {
.objects_filter = LIST_OBJECTS_FILTER_INIT,
};
int unknown_type = 0;
int input_nul_terminated = 0;
int nul_terminated = 0;
int ret;

const char * const usage[] = {
const char * const builtin_catfile_usage[] = {
N_("git cat-file <type> <object>"),
N_("git cat-file (-e | -p) <object>"),
N_("git cat-file (-t | -s) [--allow-unknown-type] <object>"),
@ -1000,6 +1094,7 @@ int cmd_cat_file(int argc,
N_("run filters on object's content"), 'w'),
OPT_STRING(0, "path", &force_path, N_("blob|tree"),
N_("use a <path> for (--textconv | --filters); Not with 'batch'")),
OPT_PARSE_LIST_OBJECTS_FILTER(&batch.objects_filter),
OPT_END()
};

@ -1007,13 +1102,27 @@ int cmd_cat_file(int argc,

batch.buffer_output = -1;

argc = parse_options(argc, argv, prefix, options, usage, 0);
argc = parse_options(argc, argv, prefix, options, builtin_catfile_usage, 0);
opt_cw = (opt == 'c' || opt == 'w');
opt_epts = (opt == 'e' || opt == 'p' || opt == 't' || opt == 's');

if (use_mailmap)
read_mailmap(&mailmap);

switch (batch.objects_filter.choice) {
case LOFC_DISABLED:
break;
case LOFC_BLOB_NONE:
case LOFC_BLOB_LIMIT:
case LOFC_OBJECT_TYPE:
if (!batch.enabled)
usage(_("objects filter only supported in batch mode"));
break;
default:
usagef(_("objects filter not supported: '%s'"),
list_object_filter_config_name(batch.objects_filter.choice));
}

/* --batch-all-objects? */
if (opt == 'b')
batch.all_objects = 1;
@ -1021,7 +1130,7 @@ int cmd_cat_file(int argc,
/* Option compatibility */
if (force_path && !opt_cw)
usage_msg_optf(_("'%s=<%s>' needs '%s' or '%s'"),
usage, options,
builtin_catfile_usage, options,
"--path", _("path|tree-ish"), "--filters",
"--textconv");

@ -1029,20 +1138,20 @@ int cmd_cat_file(int argc,
if (batch.enabled)
;
else if (batch.follow_symlinks)
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
"--follow-symlinks");
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
options, "--follow-symlinks");
else if (batch.buffer_output >= 0)
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
"--buffer");
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
options, "--buffer");
else if (batch.all_objects)
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
"--batch-all-objects");
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
options, "--batch-all-objects");
else if (input_nul_terminated)
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
"-z");
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
options, "-z");
else if (nul_terminated)
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
"-Z");
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
options, "-Z");

batch.input_delim = batch.output_delim = '\n';
if (input_nul_terminated)
@ -1063,33 +1172,37 @@ int cmd_cat_file(int argc,
batch.transform_mode = opt;
else if (opt && opt != 'b')
usage_msg_optf(_("'-%c' is incompatible with batch mode"),
usage, options, opt);
builtin_catfile_usage, options, opt);
else if (argc)
usage_msg_opt(_("batch modes take no arguments"), usage,
options);
usage_msg_opt(_("batch modes take no arguments"),
builtin_catfile_usage, options);

return batch_objects(&batch);
ret = batch_objects(&batch);
goto out;
}

if (opt) {
if (!argc && opt == 'c')
usage_msg_optf(_("<rev> required with '%s'"),
usage, options, "--textconv");
builtin_catfile_usage, options,
"--textconv");
else if (!argc && opt == 'w')
usage_msg_optf(_("<rev> required with '%s'"),
usage, options, "--filters");
builtin_catfile_usage, options,
"--filters");
else if (!argc && opt_epts)
usage_msg_optf(_("<object> required with '-%c'"),
usage, options, opt);
builtin_catfile_usage, options, opt);
else if (argc == 1)
obj_name = argv[0];
else
usage_msg_opt(_("too many arguments"), usage, options);
usage_msg_opt(_("too many arguments"), builtin_catfile_usage,
options);
} else if (!argc) {
usage_with_options(usage, options);
usage_with_options(builtin_catfile_usage, options);
} else if (argc != 2) {
usage_msg_optf(_("only two arguments allowed in <type> <object> mode, not %d"),
usage, options, argc);
builtin_catfile_usage, options, argc);
} else if (argc) {
exp_type = argv[0];
obj_name = argv[1];
@ -1097,5 +1210,10 @@ int cmd_cat_file(int argc,

if (unknown_type && opt != 't' && opt != 's')
die("git cat-file --allow-unknown-type: use with -s or -t");
return cat_one_file(opt, exp_type, obj_name, unknown_type);

ret = cat_one_file(opt, exp_type, obj_name, unknown_type);

out:
list_objects_filter_release(&batch.objects_filter);
return ret;
}

View File

@ -1820,7 +1820,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
static int add_object_entry_from_bitmap(const struct object_id *oid,
enum object_type type,
int flags UNUSED, uint32_t name_hash,
struct packed_git *pack, off_t offset)
struct packed_git *pack, off_t offset,
void *payload UNUSED)
{
display_progress(progress_state, ++nr_seen);


View File

@ -461,7 +461,8 @@ static int show_object_fast(
int exclude UNUSED,
uint32_t name_hash UNUSED,
struct packed_git *found_pack UNUSED,
off_t found_offset UNUSED)
off_t found_offset UNUSED,
void *payload UNUSED)
{
fprintf(stdout, "%s\n", oid_to_hex(oid));
return 1;

View File

@ -745,6 +745,21 @@ struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
return NULL;
}

int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack)
{
for (; bitmap; bitmap = bitmap->base) {
if (bitmap_is_midx(bitmap)) {
for (size_t i = 0; i < bitmap->midx->num_packs; i++)
if (bitmap->midx->packs[i] == pack)
return 1;
} else if (bitmap->pack == pack) {
return 1;
}
}

return 0;
}

struct include_data {
struct bitmap_index *bitmap_git;
struct bitmap *base;
@ -1625,7 +1640,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git,
(obj->type == OBJ_TAG && !revs->tag_objects))
continue;

show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0);
show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL);
}
}

@ -1662,8 +1677,10 @@ static void init_type_iterator(struct ewah_or_iterator *it,

static void show_objects_for_type(
struct bitmap_index *bitmap_git,
struct bitmap *objects,
enum object_type object_type,
show_reachable_fn show_reach)
show_reachable_fn show_reach,
void *payload)
{
size_t i = 0;
uint32_t offset;
@ -1671,8 +1688,6 @@ static void show_objects_for_type(
struct ewah_or_iterator it;
eword_t filter;

struct bitmap *objects = bitmap_git->result;

init_type_iterator(&it, bitmap_git, object_type);

for (i = 0; i < objects->word_alloc &&
@ -1715,7 +1730,7 @@ static void show_objects_for_type(
if (bitmap_git->hashes)
hash = get_be32(bitmap_git->hashes + index_pos);

show_reach(&oid, object_type, 0, hash, pack, ofs);
show_reach(&oid, object_type, 0, hash, pack, ofs, payload);
}
}

@ -2024,6 +2039,50 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
}
}

int for_each_bitmapped_object(struct bitmap_index *bitmap_git,
struct list_objects_filter_options *filter,
show_reachable_fn show_reach,
void *payload)
{
struct bitmap *filtered_bitmap = NULL;
uint32_t objects_nr;
size_t full_word_count;
int ret;

if (!can_filter_bitmap(filter)) {
ret = -1;
goto out;
}

objects_nr = bitmap_num_objects(bitmap_git);
full_word_count = objects_nr / BITS_IN_EWORD;

/* We start from the all-1 bitmap and then filter down from there. */
filtered_bitmap = bitmap_word_alloc(full_word_count + !!(objects_nr % BITS_IN_EWORD));
memset(filtered_bitmap->words, 0xff, full_word_count * sizeof(*filtered_bitmap->words));
for (size_t i = full_word_count * BITS_IN_EWORD; i < objects_nr; i++)
bitmap_set(filtered_bitmap, i);

if (filter_bitmap(bitmap_git, NULL, filtered_bitmap, filter) < 0) {
ret = -1;
goto out;
}

show_objects_for_type(bitmap_git, filtered_bitmap,
OBJ_COMMIT, show_reach, payload);
show_objects_for_type(bitmap_git, filtered_bitmap,
OBJ_TREE, show_reach, payload);
show_objects_for_type(bitmap_git, filtered_bitmap,
OBJ_BLOB, show_reach, payload);
show_objects_for_type(bitmap_git, filtered_bitmap,
OBJ_TAG, show_reach, payload);

ret = 0;
out:
bitmap_free(filtered_bitmap);
return ret;
}

struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects)
{
@ -2518,13 +2577,17 @@ void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git,
{
assert(bitmap_git->result);

show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable);
show_objects_for_type(bitmap_git, bitmap_git->result,
OBJ_COMMIT, show_reachable, NULL);
if (revs->tree_objects)
show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable);
show_objects_for_type(bitmap_git, bitmap_git->result,
OBJ_TREE, show_reachable, NULL);
if (revs->blob_objects)
show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable);
show_objects_for_type(bitmap_git, bitmap_git->result,
OBJ_BLOB, show_reachable, NULL);
if (revs->tag_objects)
show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable);
show_objects_for_type(bitmap_git, bitmap_git->result,
OBJ_TAG, show_reachable, NULL);

show_extended_objects(bitmap_git, revs, show_reachable);
}

View File

@ -50,7 +50,8 @@ typedef int (*show_reachable_fn)(
int flags,
uint32_t hash,
struct packed_git *found_pack,
off_t found_offset);
off_t found_offset,
void *payload);

struct bitmap_index;

@ -66,6 +67,13 @@ struct bitmapped_pack {

struct bitmap_index *prepare_bitmap_git(struct repository *r);
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx);

/*
* Given a bitmap index, determine whether it contains the pack either directly
* or via the multi-pack-index.
*/
int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack);

void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits,
uint32_t *trees, uint32_t *blobs, uint32_t *tags);
void traverse_bitmap_commit_list(struct bitmap_index *,
@ -78,6 +86,18 @@ int test_bitmap_pseudo_merges(struct repository *r);
int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n);
int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n);

struct list_objects_filter_options;

/*
* Filter bitmapped objects and iterate through all resulting objects,
* executing `show_reach` for each of them. Returns `-1` in case the filter is
* not supported, `0` otherwise.
*/
int for_each_bitmapped_object(struct bitmap_index *bitmap_git,
struct list_objects_filter_options *filter,
show_reachable_fn show_reach,
void *payload);

#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"


View File

@ -341,7 +341,8 @@ static int mark_object_seen(const struct object_id *oid,
int exclude UNUSED,
uint32_t name_hash UNUSED,
struct packed_git *found_pack UNUSED,
off_t found_offset UNUSED)
off_t found_offset UNUSED,
void *payload UNUSED)
{
struct object *obj = lookup_object_by_type(the_repository, oid, type);
if (!obj)

View File

@ -1410,4 +1410,103 @@ test_expect_success PERL_IPC_OPEN2 '--batch-command info is unbuffered by defaul
perl -e "$perl_script" -- --batch-command $hello_oid "$expect" "info "
'

test_expect_success 'setup for objects filter' '
git init repo &&
(
# Seed the repository with four different sets of objects:
#
# - The first set is fully packed and has a bitmap.
# - The second set is packed, but has no bitmap.
# - The third set is loose.
# - The fourth set is loose and contains big objects.
#
# This ensures that we cover all these types as expected.
cd repo &&
test_commit first &&
git repack -Adb &&
test_commit second &&
git repack -d &&
test_commit third &&

for n in 1000 10000
do
printf "%"$n"s" X >large.$n || return 1
done &&
git add large.* &&
git commit -m fourth
)
'

test_expect_success 'objects filter with unknown option' '
cat >expect <<-EOF &&
fatal: invalid filter-spec ${SQ}unknown${SQ}
EOF
test_must_fail git -C repo cat-file --filter=unknown 2>err &&
test_cmp expect err
'

for option in sparse:oid=1234 tree:1 sparse:path=x
do
test_expect_success "objects filter with unsupported option $option" '
case "$option" in
tree:1)
echo "usage: objects filter not supported: ${SQ}tree${SQ}" >expect
;;
sparse:path=x)
echo "fatal: sparse:path filters support has been dropped" >expect
;;
*)
option_name=$(echo "$option" | cut -d= -f1) &&
printf "usage: objects filter not supported: ${SQ}%s${SQ}\n" "$option_name" >expect
;;
esac &&
test_must_fail git -C repo cat-file --filter=$option 2>err &&
test_cmp expect err
'
done

test_expect_success 'objects filter: disabled' '
git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --no-filter >actual &&
sort actual >actual.sorted &&
git -C repo rev-list --objects --no-object-names --all >expect &&
sort expect >expect.sorted &&
test_cmp expect.sorted actual.sorted
'

test_objects_filter () {
filter="$1"

test_expect_success "objects filter: $filter" '
git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --filter="$filter" >actual &&
sort actual >actual.sorted &&
git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >expect &&
sort expect >expect.sorted &&
test_cmp expect.sorted actual.sorted
'

test_expect_success "objects filter prints excluded objects: $filter" '
# Find all objects that would be excluded by the current filter.
git -C repo rev-list --objects --no-object-names --all >all &&
git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >filtered &&
sort all >all.sorted &&
sort filtered >filtered.sorted &&
comm -23 all.sorted filtered.sorted >expected.excluded &&
test_line_count -gt 0 expected.excluded &&

git -C repo cat-file --batch-check="%(objectname)" --filter="$filter" <expected.excluded >actual &&
awk "/excluded/{ print \$1 }" actual | sort >actual.excluded &&
test_cmp expected.excluded actual.excluded
'
}

test_objects_filter "blob:none"
test_objects_filter "blob:limit=1"
test_objects_filter "blob:limit=500"
test_objects_filter "blob:limit=1000"
test_objects_filter "blob:limit=1k"
test_objects_filter "object:type=blob"
test_objects_filter "object:type=commit"
test_objects_filter "object:type=tag"
test_objects_filter "object:type=tree"

test_done