Merge branch 'tb/midx-avoid-cruft-packs' into ps/object-store-midx

* tb/midx-avoid-cruft-packs:
  repack: exclude cruft pack(s) from the MIDX where possible
  pack-objects: introduce '--stdin-packs=follow'
  pack-objects: swap 'show_{object,commit}_pack_hint'
  pack-objects: fix typo in 'show_object_pack_hint()'
  pack-objects: perform name-hash traversal for unpacked objects
  pack-objects: declare 'rev_info' for '--stdin-packs' earlier
  pack-objects: factor out handling '--stdin-packs'
  pack-objects: limit scope in 'add_object_entry_from_pack()'
  pack-objects: use standard option incompatibility functions
maint
Junio C Hamano 2025-07-15 12:06:57 -07:00
commit c29998d1d4
6 changed files with 571 additions and 89 deletions

View File

@ -39,3 +39,10 @@ repack.cruftThreads::
a cruft pack and the respective parameters are not given over
the command line. See similarly named `pack.*` configuration
variables for defaults and meaning.

repack.midxMustContainCruft::
When set to true, linkgit:git-repack[1] will unconditionally include
cruft pack(s), if any, in the multi-pack index when invoked with
`--write-midx`. When false, cruft packs are only included in the MIDX
when necessary (e.g., because they might be required to form a
reachability closure with MIDX bitmaps). Defaults to true.

View File

@ -87,13 +87,21 @@ base-name::
reference was included in the resulting packfile. This
can be useful to send new tags to native Git clients.

--stdin-packs::
--stdin-packs[=<mode>]::
Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
from the standard input, instead of object names or revision
arguments. The resulting pack contains all objects listed in the
included packs (those not beginning with `^`), excluding any
objects listed in the excluded packs (beginning with `^`).
+
When `mode` is "follow", objects from packs not listed on stdin receive
special treatment. Objects within unlisted packs will be included if
those objects are (1) reachable from the included packs, and (2) not
found in any excluded packs. This mode is useful, for example, to
resurrect once-unreachable objects found in cruft packs to generate
packs which are closed under reachability up to the boundary set by the
excluded packs.
+
Incompatible with `--revs`, or options that imply `--revs` (such as
`--all`), with the exception of `--unpacked`, which is compatible.


View File

@ -284,6 +284,12 @@ static struct oidmap configured_exclusions;
static struct oidset excluded_by_config;
static int name_hash_version = -1;

enum stdin_packs_mode {
STDIN_PACKS_MODE_NONE,
STDIN_PACKS_MODE_STANDARD,
STDIN_PACKS_MODE_FOLLOW,
};

/**
* Check whether the name_hash_version chosen by user input is appropriate,
* and also validate whether it is compatible with other features.
@ -3727,7 +3733,6 @@ static int add_object_entry_from_pack(const struct object_id *oid,
return 0;

if (p) {
struct rev_info *revs = _data;
struct object_info oi = OBJECT_INFO_INIT;

oi.typep = &type;
@ -3735,6 +3740,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
die(_("could not get type of object %s in pack %s"),
oid_to_hex(oid), p->pack_name);
} else if (type == OBJ_COMMIT) {
struct rev_info *revs = _data;
/*
* commits in included packs are used as starting points for the
* subsequent revision walk
@ -3750,32 +3756,48 @@ static int add_object_entry_from_pack(const struct object_id *oid,
return 0;
}

static void show_commit_pack_hint(struct commit *commit UNUSED,
void *data UNUSED)
static void show_object_pack_hint(struct object *object, const char *name,
void *data)
{
/* nothing to do; commits don't have a namehash */
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
if (mode == STDIN_PACKS_MODE_FOLLOW) {
if (object->type == OBJ_BLOB &&
!has_object(the_repository, &object->oid, 0))
return;
add_object_entry(&object->oid, object->type, name, 0);
} else {
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
if (!oe)
return;

/*
* Our 'to_pack' list was constructed by iterating all
* objects packed in included packs, and so doesn't have
* a non-zero hash field that you would typically pick
* up during a reachability traversal.
*
* Make a best-effort attempt to fill in the ->hash and
* ->no_try_delta fields here in order to perhaps
* improve the delta selection process.
*/
oe->hash = pack_name_hash_fn(name);
oe->no_try_delta = name && no_try_delta(name);

stdin_packs_hints_nr++;
}
}

static void show_object_pack_hint(struct object *object, const char *name,
void *data UNUSED)
static void show_commit_pack_hint(struct commit *commit, void *data)
{
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
if (!oe)
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;

if (mode == STDIN_PACKS_MODE_FOLLOW) {
show_object_pack_hint((struct object *)commit, "", data);
return;
}

/*
* Our 'to_pack' list was constructed by iterating all objects packed in
* included packs, and so doesn't have a non-zero hash field that you
* would typically pick up during a reachability traversal.
*
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
* here using a now in order to perhaps improve the delta selection
* process.
*/
oe->hash = pack_name_hash_fn(name);
oe->no_try_delta = name && no_try_delta(name);
/* nothing to do; commits don't have a namehash */

stdin_packs_hints_nr++;
}

static int pack_mtime_cmp(const void *_a, const void *_b)
@ -3795,7 +3817,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b)
return 0;
}

static void read_packs_list_from_stdin(void)
static void read_packs_list_from_stdin(struct rev_info *revs)
{
struct strbuf buf = STRBUF_INIT;
struct string_list include_packs = STRING_LIST_INIT_DUP;
@ -3803,24 +3825,6 @@ static void read_packs_list_from_stdin(void)
struct string_list_item *item = NULL;

struct packed_git *p;
struct rev_info revs;

repo_init_revisions(the_repository, &revs, NULL);
/*
* Use a revision walk to fill in the namehash of objects in the include
* packs. To save time, we'll avoid traversing through objects that are
* in excluded packs.
*
* That may cause us to avoid populating all of the namehash fields of
* all included objects, but our goal is best-effort, since this is only
* an optimization during delta selection.
*/
revs.no_kept_objects = 1;
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
revs.blob_objects = 1;
revs.tree_objects = 1;
revs.tag_objects = 1;
revs.ignore_missing_links = 1;

while (strbuf_getline(&buf, stdin) != EOF) {
if (!buf.len)
@ -3890,25 +3894,55 @@ static void read_packs_list_from_stdin(void)
struct packed_git *p = item->util;
for_each_object_in_pack(p,
add_object_entry_from_pack,
&revs,
revs,
FOR_EACH_OBJECT_PACK_ORDER);
}

strbuf_release(&buf);
string_list_clear(&include_packs, 0);
string_list_clear(&exclude_packs, 0);
}

static void add_unreachable_loose_objects(struct rev_info *revs);

static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
{
struct rev_info revs;

repo_init_revisions(the_repository, &revs, NULL);
/*
* Use a revision walk to fill in the namehash of objects in the include
* packs. To save time, we'll avoid traversing through objects that are
* in excluded packs.
*
* That may cause us to avoid populating all of the namehash fields of
* all included objects, but our goal is best-effort, since this is only
* an optimization during delta selection.
*/
revs.no_kept_objects = 1;
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
revs.blob_objects = 1;
revs.tree_objects = 1;
revs.tag_objects = 1;
revs.ignore_missing_links = 1;

/* avoids adding objects in excluded packs */
ignore_packed_keep_in_core = 1;
read_packs_list_from_stdin(&revs);
if (rev_list_unpacked)
add_unreachable_loose_objects(&revs);

if (prepare_revision_walk(&revs))
die(_("revision walk setup failed"));
traverse_commit_list(&revs,
show_commit_pack_hint,
show_object_pack_hint,
NULL);
&mode);

trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
stdin_packs_found_nr);
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
stdin_packs_hints_nr);

strbuf_release(&buf);
string_list_clear(&include_packs, 0);
string_list_clear(&exclude_packs, 0);
}

static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
@ -4006,7 +4040,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
}
}

static void add_unreachable_loose_objects(void);
static void add_objects_in_unpacked_packs(void);

static void enumerate_cruft_objects(void)
@ -4016,7 +4049,7 @@ static void enumerate_cruft_objects(void)
_("Enumerating cruft objects"), 0);

add_objects_in_unpacked_packs();
add_unreachable_loose_objects();
add_unreachable_loose_objects(NULL);

stop_progress(&progress_state);
}
@ -4294,8 +4327,9 @@ static void add_objects_in_unpacked_packs(void)
}

static int add_loose_object(const struct object_id *oid, const char *path,
void *data UNUSED)
void *data)
{
struct rev_info *revs = data;
enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL);

if (type < 0) {
@ -4316,6 +4350,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
} else {
add_object_entry(oid, type, "", 0);
}

if (revs && type == OBJ_COMMIT)
add_pending_oid(revs, NULL, oid, 0);

return 0;
}

@ -4324,11 +4362,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
* add_object_entry will weed out duplicates, so we just add every
* loose object we find.
*/
static void add_unreachable_loose_objects(void)
static void add_unreachable_loose_objects(struct rev_info *revs)
{
for_each_loose_file_in_objdir(repo_get_object_directory(the_repository),
add_loose_object,
NULL, NULL, NULL);
add_loose_object, NULL, NULL, revs);
}

static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
@ -4675,7 +4712,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
if (keep_unreachable)
add_objects_in_unpacked_packs();
if (pack_loose_unreachable)
add_unreachable_loose_objects();
add_unreachable_loose_objects(NULL);
if (unpack_unreachable)
loosen_unused_packed_objects();

@ -4782,6 +4819,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) {
return is_not_in_promisor_pack_obj((struct object *) commit, data);
}

static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
int unset)
{
enum stdin_packs_mode *mode = opt->value;

if (unset)
*mode = STDIN_PACKS_MODE_NONE;
else if (!arg || !*arg)
*mode = STDIN_PACKS_MODE_STANDARD;
else if (!strcmp(arg, "follow"))
*mode = STDIN_PACKS_MODE_FOLLOW;
else
die(_("invalid value for '%s': '%s'"), opt->long_name, arg);

return 0;
}

int cmd_pack_objects(int argc,
const char **argv,
const char *prefix,
@ -4792,7 +4846,7 @@ int cmd_pack_objects(int argc,
struct strvec rp = STRVEC_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
int rev_list_index = 0;
int stdin_packs = 0;
enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
struct list_objects_filter_options filter_options =
LIST_OBJECTS_FILTER_INIT;
@ -4847,6 +4901,9 @@ int cmd_pack_objects(int argc,
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
N_("include objects referred to by the index"),
1, PARSE_OPT_NONEG),
OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
N_("read packs from stdin"),
PARSE_OPT_OPTARG, parse_stdin_packs_mode),
OPT_BOOL(0, "stdin-packs", &stdin_packs,
N_("read packs from stdin")),
OPT_BOOL(0, "stdout", &pack_to_stdout,
@ -5012,9 +5069,10 @@ int cmd_pack_objects(int argc,
strvec_push(&rp, "--unpacked");
}

if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
die(_("options '%s' and '%s' cannot be used together"),
"--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
die_for_incompatible_opt2(exclude_promisor_objects,
"--exclude-promisor-objects",
exclude_promisor_objects_best_effort,
"--exclude-promisor-objects-best-effort");
if (exclude_promisor_objects) {
use_internal_rev_list = 1;
fetch_if_missing = 0;
@ -5052,13 +5110,14 @@ int cmd_pack_objects(int argc,
if (!pack_to_stdout && thin)
die(_("--thin cannot be used to build an indexable pack"));

if (keep_unreachable && unpack_unreachable)
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
unpack_unreachable, "--unpack-unreachable");
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
unpack_unreachable_expiration = 0;

if (stdin_packs && filter_options.choice)
die(_("cannot use --filter with --stdin-packs"));
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
filter_options.choice, "--filter");


if (stdin_packs && use_internal_rev_list)
die(_("cannot use internal rev list with --stdin-packs"));
@ -5066,8 +5125,8 @@ int cmd_pack_objects(int argc,
if (cruft) {
if (use_internal_rev_list)
die(_("cannot use internal rev list with --cruft"));
if (stdin_packs)
die(_("cannot use --stdin-packs with --cruft"));
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
cruft, "--cruft");
}

/*
@ -5135,11 +5194,7 @@ int cmd_pack_objects(int argc,
progress_state = start_progress(the_repository,
_("Enumerating objects"), 0);
if (stdin_packs) {
/* avoids adding objects in excluded packs */
ignore_packed_keep_in_core = 1;
read_packs_list_from_stdin();
if (rev_list_unpacked)
add_unreachable_loose_objects();
read_stdin_packs(stdin_packs, rev_list_unpacked);
} else if (cruft) {
read_cruft_objects();
} else if (!use_internal_rev_list) {

View File

@ -39,6 +39,7 @@ static int write_bitmaps = -1;
static int use_delta_islands;
static int run_update_server_info = 1;
static char *packdir, *packtmp_name, *packtmp;
static int midx_must_contain_cruft = 1;

static const char *const git_repack_usage[] = {
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
@ -108,6 +109,10 @@ static int repack_config(const char *var, const char *value,
free(cruft_po_args->threads);
return git_config_string(&cruft_po_args->threads, var, value);
}
if (!strcmp(var, "repack.midxmustcontaincruft")) {
midx_must_contain_cruft = git_config_bool(var, value);
return 0;
}
return git_default_config(var, value, ctx, cb);
}

@ -690,6 +695,77 @@ static void free_pack_geometry(struct pack_geometry *geometry)
free(geometry->pack);
}

static int midx_has_unknown_packs(char **midx_pack_names,
size_t midx_pack_names_nr,
struct string_list *include,
struct pack_geometry *geometry,
struct existing_packs *existing)
{
size_t i;

string_list_sort(include);

for (i = 0; i < midx_pack_names_nr; i++) {
const char *pack_name = midx_pack_names[i];

/*
* Determine whether or not each MIDX'd pack from the existing
* MIDX (if any) is represented in the new MIDX. For each pack
* in the MIDX, it must either be:
*
* - In the "include" list of packs to be included in the new
* MIDX. Note this function is called before the include
* list is populated with any cruft pack(s).
*
* - Below the geometric split line (if using pack geometry),
* indicating that the pack won't be included in the new
* MIDX, but its contents were rolled up as part of the
* geometric repack.
*
* - In the existing non-kept packs list (if not using pack
* geometry), and marked as non-deleted.
*/
if (string_list_has_string(include, pack_name)) {
continue;
} else if (geometry) {
struct strbuf buf = STRBUF_INIT;
uint32_t j;

for (j = 0; j < geometry->split; j++) {
strbuf_reset(&buf);
strbuf_addstr(&buf, pack_basename(geometry->pack[j]));
strbuf_strip_suffix(&buf, ".pack");
strbuf_addstr(&buf, ".idx");

if (!strcmp(pack_name, buf.buf)) {
strbuf_release(&buf);
break;
}
}

strbuf_release(&buf);

if (j < geometry->split)
continue;
} else {
struct string_list_item *item;

item = string_list_lookup(&existing->non_kept_packs,
pack_name);
if (item && !pack_is_marked_for_deletion(item))
continue;
}

/*
* If we got to this point, the MIDX includes some pack that we
* don't know about.
*/
return 1;
}

return 0;
}

struct midx_snapshot_ref_data {
struct tempfile *f;
struct oidset seen;
@ -758,6 +834,8 @@ static void midx_snapshot_refs(struct tempfile *f)

static void midx_included_packs(struct string_list *include,
struct existing_packs *existing,
char **midx_pack_names,
size_t midx_pack_names_nr,
struct string_list *names,
struct pack_geometry *geometry)
{
@ -811,26 +889,56 @@ static void midx_included_packs(struct string_list *include,
}
}

for_each_string_list_item(item, &existing->cruft_packs) {
if (midx_must_contain_cruft ||
midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr,
include, geometry, existing)) {
/*
* When doing a --geometric repack, there is no need to check
* for deleted packs, since we're by definition not doing an
* ALL_INTO_ONE repack (hence no packs will be deleted).
* Otherwise we must check for and exclude any packs which are
* enqueued for deletion.
* If there are one or more unknown pack(s) present (see
* midx_has_unknown_packs() for what makes a pack
* "unknown") in the MIDX before the repack, keep them
* as they may be required to form a reachability
* closure if the MIDX is bitmapped.
*
* So we could omit the conditional below in the --geometric
* case, but doing so is unnecessary since no packs are marked
* as pending deletion (since we only call
* `mark_packs_for_deletion()` when doing an all-into-one
* repack).
* For example, a cruft pack can be required to form a
* reachability closure if the MIDX is bitmapped and one
* or more of the bitmap's selected commits reaches a
* once-cruft object that was later made reachable.
*/
if (pack_is_marked_for_deletion(item))
continue;
for_each_string_list_item(item, &existing->cruft_packs) {
/*
* When doing a --geometric repack, there is no
* need to check for deleted packs, since we're
* by definition not doing an ALL_INTO_ONE
* repack (hence no packs will be deleted).
* Otherwise we must check for and exclude any
* packs which are enqueued for deletion.
*
* So we could omit the conditional below in the
* --geometric case, but doing so is unnecessary
* since no packs are marked as pending
* deletion (since we only call
* `mark_packs_for_deletion()` when doing an
* all-into-one repack).
*/
if (pack_is_marked_for_deletion(item))
continue;

strbuf_reset(&buf);
strbuf_addf(&buf, "%s.idx", item->string);
string_list_insert(include, buf.buf);
strbuf_reset(&buf);
strbuf_addf(&buf, "%s.idx", item->string);
string_list_insert(include, buf.buf);
}
} else {
/*
* Modern versions of Git (with the appropriate
* configuration setting) will write new copies of
* once-cruft objects when doing a --geometric repack.
*
* If the MIDX has no cruft pack, new packs written
* during a --geometric repack will not rely on the
* cruft pack to form a reachability closure, so we can
* avoid including them in the MIDX in that case.
*/
;
}

strbuf_release(&buf);
@ -1145,6 +1253,8 @@ int cmd_repack(int argc,
struct tempfile *refs_snapshot = NULL;
int i, ext, ret;
int show_progress;
char **midx_pack_names = NULL;
size_t midx_pack_names_nr = 0;

/* variables to be filled by option parsing */
int delete_redundant = 0;
@ -1362,7 +1472,10 @@ int cmd_repack(int argc,
!(pack_everything & PACK_CRUFT))
strvec_push(&cmd.args, "--pack-loose-unreachable");
} else if (geometry.split_factor) {
strvec_push(&cmd.args, "--stdin-packs");
if (midx_must_contain_cruft)
strvec_push(&cmd.args, "--stdin-packs");
else
strvec_push(&cmd.args, "--stdin-packs=follow");
strvec_push(&cmd.args, "--unpacked");
} else {
strvec_push(&cmd.args, "--unpacked");
@ -1402,8 +1515,25 @@ int cmd_repack(int argc,
if (ret)
goto cleanup;

if (!names.nr && !po_args.quiet)
printf_ln(_("Nothing new to pack."));
if (!names.nr) {
if (!po_args.quiet)
printf_ln(_("Nothing new to pack."));
/*
* If we didn't write any new packs, the non-cruft packs
* may refer to once-unreachable objects in the cruft
* pack(s).
*
* If there isn't already a MIDX, the one we write
* must include the cruft pack(s), in case the
* non-cruft pack(s) refer to once-cruft objects.
*
* If there is already a MIDX, we can punt here, since
* midx_has_unknown_packs() will make the decision for
* us.
*/
if (!get_local_multi_pack_index(the_repository))
midx_must_contain_cruft = 1;
}

if (pack_everything & PACK_CRUFT) {
const char *pack_prefix = find_pack_prefix(packdir, packtmp);
@ -1484,6 +1614,19 @@ int cmd_repack(int argc,

string_list_sort(&names);

if (get_local_multi_pack_index(the_repository)) {
struct multi_pack_index *m =
get_local_multi_pack_index(the_repository);

ALLOC_ARRAY(midx_pack_names,
m->num_packs + m->num_packs_in_base);

for (; m; m = m->base_midx)
for (uint32_t i = 0; i < m->num_packs; i++)
midx_pack_names[midx_pack_names_nr++] =
xstrdup(m->pack_names[i]);
}

close_object_store(the_repository->objects);

/*
@ -1525,7 +1668,8 @@ int cmd_repack(int argc,

if (write_midx) {
struct string_list include = STRING_LIST_INIT_DUP;
midx_included_packs(&include, &existing, &names, &geometry);
midx_included_packs(&include, &existing, midx_pack_names,
midx_pack_names_nr, &names, &geometry);

ret = write_midx_included_packs(&include, &geometry, &names,
refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
@ -1576,6 +1720,9 @@ cleanup:
string_list_clear(&names, 1);
existing_packs_release(&existing);
free_pack_geometry(&geometry);
for (size_t i = 0; i < midx_pack_names_nr; i++)
free(midx_pack_names[i]);
free(midx_pack_names);
pack_objects_args_release(&po_args);
pack_objects_args_release(&cruft_po_args);


View File

@ -64,7 +64,7 @@ test_expect_success '--stdin-packs is incompatible with --filter' '
cd stdin-packs &&
test_must_fail git pack-objects --stdin-packs --stdout \
--filter=blob:none </dev/null 2>err &&
test_grep "cannot use --filter with --stdin-packs" err
test_grep "options .--stdin-packs. and .--filter. cannot be used together" err
)
'

@ -236,4 +236,124 @@ test_expect_success 'pack-objects --stdin with packfiles from main and alternate
test_cmp expected-objects actual-objects
'

objdir=.git/objects
packdir=$objdir/pack

objects_in_packs () {
for p in "$@"
do
git show-index <"$packdir/pack-$p.idx" || return 1
done >objects.raw &&

cut -d' ' -f2 objects.raw | sort &&
rm -f objects.raw
}

test_expect_success '--stdin-packs=follow walks into unknown packs' '
test_when_finished "rm -fr repo" &&

git init repo &&
(
cd repo &&

for c in A B C D
do
test_commit "$c" || return 1
done &&

A="$(echo A | git pack-objects --revs $packdir/pack)" &&
B="$(echo A..B | git pack-objects --revs $packdir/pack)" &&
C="$(echo B..C | git pack-objects --revs $packdir/pack)" &&
D="$(echo C..D | git pack-objects --revs $packdir/pack)" &&
test_commit E &&

git prune-packed &&

cat >in <<-EOF &&
pack-$B.pack
^pack-$C.pack
pack-$D.pack
EOF

# With just --stdin-packs, pack "A" is unknown to us, so
# only objects from packs "B" and "D" are included in
# the output pack.
P=$(git pack-objects --stdin-packs $packdir/pack <in) &&
objects_in_packs $B $D >expect &&
objects_in_packs $P >actual &&
test_cmp expect actual &&

# But with --stdin-packs=follow, objects from both
# included packs reach objects from the unknown pack, so
# objects from pack "A" is included in the output pack
# in addition to the above.
P=$(git pack-objects --stdin-packs=follow $packdir/pack <in) &&
objects_in_packs $A $B $D >expect &&
objects_in_packs $P >actual &&
test_cmp expect actual &&

# And with --unpacked, we will pick up objects from unknown
# packs that are reachable from loose objects. Loose object E
# reaches objects in pack A, but there are three excluded packs
# in between.
#
# The resulting pack should include objects reachable from E
# that are not present in packs B, C, or D, along with those
# present in pack A.
cat >in <<-EOF &&
^pack-$B.pack
^pack-$C.pack
^pack-$D.pack
EOF

P=$(git pack-objects --stdin-packs=follow --unpacked \
$packdir/pack <in) &&

{
objects_in_packs $A &&
git rev-list --objects --no-object-names D..E
}>expect.raw &&
sort expect.raw >expect &&
objects_in_packs $P >actual &&
test_cmp expect actual
)
'

stdin_packs__follow_with_only () {
rm -fr stdin_packs__follow_with_only &&
git init stdin_packs__follow_with_only &&
(
cd stdin_packs__follow_with_only &&

test_commit A &&
test_commit B &&

git rev-parse "$@" >B.objects &&

echo A | git pack-objects --revs $packdir/pack &&
B="$(git pack-objects $packdir/pack <B.objects)" &&

git cat-file --batch-check="%(objectname)" --batch-all-objects >objs &&
for obj in $(cat objs)
do
rm -f $objdir/$(test_oid_to_path $obj) || return 1
done &&

( cd $packdir && ls pack-*.pack ) >in &&
git pack-objects --stdin-packs=follow --stdout >/dev/null <in
)
}

test_expect_success '--stdin-packs=follow tolerates missing blobs' '
stdin_packs__follow_with_only HEAD HEAD^{tree}
'

test_expect_success '--stdin-packs=follow tolerates missing trees' '
stdin_packs__follow_with_only HEAD HEAD:B.t
'

test_expect_success '--stdin-packs=follow tolerates missing commits' '
stdin_packs__follow_with_only HEAD HEAD^{tree}
'

test_done

View File

@ -724,4 +724,149 @@ test_expect_success 'cruft repack respects --quiet' '
)
'

setup_cruft_exclude_tests() {
git init "$1" &&
(
cd "$1" &&

git config repack.midxMustContainCruft false &&

test_commit one &&

test_commit --no-tag two &&
two="$(git rev-parse HEAD)" &&
test_commit --no-tag three &&
three="$(git rev-parse HEAD)" &&
git reset --hard one &&
git reflog expire --all --expire=all &&

GIT_TEST_MULTI_PACK_INDEX=0 git repack --cruft -d &&

git merge $two &&
test_commit four
)
}

test_expect_success 'repack --write-midx excludes cruft where possible' '
setup_cruft_exclude_tests exclude-cruft-when-possible &&
(
cd exclude-cruft-when-possible &&

GIT_TEST_MULTI_PACK_INDEX=0 \
git repack -d --geometric=2 --write-midx --write-bitmap-index &&

test-tool read-midx --show-objects $objdir >midx &&
cruft="$(ls $packdir/*.mtimes)" &&
test_grep ! "$(basename "$cruft" .mtimes).idx" midx &&

git rev-list --all --objects --no-object-names >reachable.raw &&
sort reachable.raw >reachable.objects &&
awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects &&

test_cmp reachable.objects midx.objects
)
'

test_expect_success 'repack --write-midx includes cruft when instructed' '
setup_cruft_exclude_tests exclude-cruft-when-instructed &&
(
cd exclude-cruft-when-instructed &&

GIT_TEST_MULTI_PACK_INDEX=0 \
git -c repack.midxMustContainCruft=true repack \
-d --geometric=2 --write-midx --write-bitmap-index &&

test-tool read-midx --show-objects $objdir >midx &&
cruft="$(ls $packdir/*.mtimes)" &&
test_grep "$(basename "$cruft" .mtimes).idx" midx &&

git cat-file --batch-check="%(objectname)" --batch-all-objects \
>all.objects &&
awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects &&

test_cmp all.objects midx.objects
)
'

test_expect_success 'repack --write-midx includes cruft when necessary' '
setup_cruft_exclude_tests exclude-cruft-when-necessary &&
(
cd exclude-cruft-when-necessary &&

test_path_is_file $(ls $packdir/pack-*.mtimes) &&
( cd $packdir && ls pack-*.idx ) | sort >packs.all &&
git multi-pack-index write --stdin-packs --bitmap <packs.all &&

test_commit five &&
GIT_TEST_MULTI_PACK_INDEX=0 \
git repack -d --geometric=2 --write-midx --write-bitmap-index &&

test-tool read-midx --show-objects $objdir >midx &&
awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects &&
git cat-file --batch-all-objects --batch-check="%(objectname)" \
>expect.objects &&
test_cmp expect.objects midx.objects &&

grep "^pack-" midx >midx.packs &&
test_line_count = "$(($(wc -l <packs.all) + 1))" midx.packs
)
'

test_expect_success 'repack --write-midx includes cruft when already geometric' '
git init repack--write-midx-geometric-noop &&
(
cd repack--write-midx-geometric-noop &&

git branch -M main &&
test_commit A &&
test_commit B &&

git checkout -B side &&
test_commit --no-tag C &&
C="$(git rev-parse HEAD)" &&

git checkout main &&
git branch -D side &&
git reflog expire --all --expire=all &&

# At this point we have two packs: one containing the
# objects belonging to commits A and B, and another
# (cruft) pack containing the objects belonging to
# commit C.
git repack --cruft -d &&

# Create a third pack which contains a merge commit
# making commit C reachable again.
#
# --no-ff is important here, as it ensures that we
# actually write a new object and subsequently a new
# pack to contain it.
git merge --no-ff $C &&
git repack -d &&

ls $packdir/pack-*.idx | sort >packs.all &&
cruft="$(ls $packdir/pack-*.mtimes)" &&
cruft="${cruft%.mtimes}.idx" &&

for idx in $(grep -v $cruft <packs.all)
do
git show-index <$idx >out &&
wc -l <out || return 1
done >sizes.raw &&

# Make sure that there are two non-cruft packs, and
# that one of them contains at least twice as many
# objects as the other, ensuring that they are already
# in a geometric progression.
sort -n sizes.raw >sizes &&
test_line_count = 2 sizes &&
s1=$(head -n 1 sizes) &&
s2=$(tail -n 1 sizes) &&
test "$s2" -gt "$((2 * $s1))" &&

git -c repack.midxMustContainCruft=false repack --geometric=2 \
--write-midx --write-bitmap-index
)
'

test_done