Merge branch 'tb/midx-avoid-cruft-packs' into ps/object-store-midx
* tb/midx-avoid-cruft-packs:
repack: exclude cruft pack(s) from the MIDX where possible
pack-objects: introduce '--stdin-packs=follow'
pack-objects: swap 'show_{object,commit}_pack_hint'
pack-objects: fix typo in 'show_object_pack_hint()'
pack-objects: perform name-hash traversal for unpacked objects
pack-objects: declare 'rev_info' for '--stdin-packs' earlier
pack-objects: factor out handling '--stdin-packs'
pack-objects: limit scope in 'add_object_entry_from_pack()'
pack-objects: use standard option incompatibility functions
maint
commit
c29998d1d4
|
|
@ -39,3 +39,10 @@ repack.cruftThreads::
|
|||
a cruft pack and the respective parameters are not given over
|
||||
the command line. See similarly named `pack.*` configuration
|
||||
variables for defaults and meaning.
|
||||
|
||||
repack.midxMustContainCruft::
|
||||
When set to true, linkgit:git-repack[1] will unconditionally include
|
||||
cruft pack(s), if any, in the multi-pack index when invoked with
|
||||
`--write-midx`. When false, cruft packs are only included in the MIDX
|
||||
when necessary (e.g., because they might be required to form a
|
||||
reachability closure with MIDX bitmaps). Defaults to true.
|
||||
|
|
|
|||
|
|
@ -87,13 +87,21 @@ base-name::
|
|||
reference was included in the resulting packfile. This
|
||||
can be useful to send new tags to native Git clients.
|
||||
|
||||
--stdin-packs::
|
||||
--stdin-packs[=<mode>]::
|
||||
Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
|
||||
from the standard input, instead of object names or revision
|
||||
arguments. The resulting pack contains all objects listed in the
|
||||
included packs (those not beginning with `^`), excluding any
|
||||
objects listed in the excluded packs (beginning with `^`).
|
||||
+
|
||||
When `mode` is "follow", objects from packs not listed on stdin receive
|
||||
special treatment. Objects within unlisted packs will be included if
|
||||
those objects are (1) reachable from the included packs, and (2) not
|
||||
found in any excluded packs. This mode is useful, for example, to
|
||||
resurrect once-unreachable objects found in cruft packs to generate
|
||||
packs which are closed under reachability up to the boundary set by the
|
||||
excluded packs.
|
||||
+
|
||||
Incompatible with `--revs`, or options that imply `--revs` (such as
|
||||
`--all`), with the exception of `--unpacked`, which is compatible.
|
||||
|
||||
|
|
|
|||
|
|
@ -284,6 +284,12 @@ static struct oidmap configured_exclusions;
|
|||
static struct oidset excluded_by_config;
|
||||
static int name_hash_version = -1;
|
||||
|
||||
enum stdin_packs_mode {
|
||||
STDIN_PACKS_MODE_NONE,
|
||||
STDIN_PACKS_MODE_STANDARD,
|
||||
STDIN_PACKS_MODE_FOLLOW,
|
||||
};
|
||||
|
||||
/**
|
||||
* Check whether the name_hash_version chosen by user input is appropriate,
|
||||
* and also validate whether it is compatible with other features.
|
||||
|
|
@ -3727,7 +3733,6 @@ static int add_object_entry_from_pack(const struct object_id *oid,
|
|||
return 0;
|
||||
|
||||
if (p) {
|
||||
struct rev_info *revs = _data;
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
|
||||
oi.typep = &type;
|
||||
|
|
@ -3735,6 +3740,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
|
|||
die(_("could not get type of object %s in pack %s"),
|
||||
oid_to_hex(oid), p->pack_name);
|
||||
} else if (type == OBJ_COMMIT) {
|
||||
struct rev_info *revs = _data;
|
||||
/*
|
||||
* commits in included packs are used as starting points for the
|
||||
* subsequent revision walk
|
||||
|
|
@ -3750,32 +3756,48 @@ static int add_object_entry_from_pack(const struct object_id *oid,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void show_commit_pack_hint(struct commit *commit UNUSED,
|
||||
void *data UNUSED)
|
||||
static void show_object_pack_hint(struct object *object, const char *name,
|
||||
void *data)
|
||||
{
|
||||
/* nothing to do; commits don't have a namehash */
|
||||
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
|
||||
if (mode == STDIN_PACKS_MODE_FOLLOW) {
|
||||
if (object->type == OBJ_BLOB &&
|
||||
!has_object(the_repository, &object->oid, 0))
|
||||
return;
|
||||
add_object_entry(&object->oid, object->type, name, 0);
|
||||
} else {
|
||||
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
|
||||
if (!oe)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Our 'to_pack' list was constructed by iterating all
|
||||
* objects packed in included packs, and so doesn't have
|
||||
* a non-zero hash field that you would typically pick
|
||||
* up during a reachability traversal.
|
||||
*
|
||||
* Make a best-effort attempt to fill in the ->hash and
|
||||
* ->no_try_delta fields here in order to perhaps
|
||||
* improve the delta selection process.
|
||||
*/
|
||||
oe->hash = pack_name_hash_fn(name);
|
||||
oe->no_try_delta = name && no_try_delta(name);
|
||||
|
||||
stdin_packs_hints_nr++;
|
||||
}
|
||||
}
|
||||
|
||||
static void show_object_pack_hint(struct object *object, const char *name,
|
||||
void *data UNUSED)
|
||||
static void show_commit_pack_hint(struct commit *commit, void *data)
|
||||
{
|
||||
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
|
||||
if (!oe)
|
||||
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
|
||||
|
||||
if (mode == STDIN_PACKS_MODE_FOLLOW) {
|
||||
show_object_pack_hint((struct object *)commit, "", data);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Our 'to_pack' list was constructed by iterating all objects packed in
|
||||
* included packs, and so doesn't have a non-zero hash field that you
|
||||
* would typically pick up during a reachability traversal.
|
||||
*
|
||||
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
|
||||
* here using a now in order to perhaps improve the delta selection
|
||||
* process.
|
||||
*/
|
||||
oe->hash = pack_name_hash_fn(name);
|
||||
oe->no_try_delta = name && no_try_delta(name);
|
||||
/* nothing to do; commits don't have a namehash */
|
||||
|
||||
stdin_packs_hints_nr++;
|
||||
}
|
||||
|
||||
static int pack_mtime_cmp(const void *_a, const void *_b)
|
||||
|
|
@ -3795,7 +3817,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void read_packs_list_from_stdin(void)
|
||||
static void read_packs_list_from_stdin(struct rev_info *revs)
|
||||
{
|
||||
struct strbuf buf = STRBUF_INIT;
|
||||
struct string_list include_packs = STRING_LIST_INIT_DUP;
|
||||
|
|
@ -3803,24 +3825,6 @@ static void read_packs_list_from_stdin(void)
|
|||
struct string_list_item *item = NULL;
|
||||
|
||||
struct packed_git *p;
|
||||
struct rev_info revs;
|
||||
|
||||
repo_init_revisions(the_repository, &revs, NULL);
|
||||
/*
|
||||
* Use a revision walk to fill in the namehash of objects in the include
|
||||
* packs. To save time, we'll avoid traversing through objects that are
|
||||
* in excluded packs.
|
||||
*
|
||||
* That may cause us to avoid populating all of the namehash fields of
|
||||
* all included objects, but our goal is best-effort, since this is only
|
||||
* an optimization during delta selection.
|
||||
*/
|
||||
revs.no_kept_objects = 1;
|
||||
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
|
||||
revs.blob_objects = 1;
|
||||
revs.tree_objects = 1;
|
||||
revs.tag_objects = 1;
|
||||
revs.ignore_missing_links = 1;
|
||||
|
||||
while (strbuf_getline(&buf, stdin) != EOF) {
|
||||
if (!buf.len)
|
||||
|
|
@ -3890,25 +3894,55 @@ static void read_packs_list_from_stdin(void)
|
|||
struct packed_git *p = item->util;
|
||||
for_each_object_in_pack(p,
|
||||
add_object_entry_from_pack,
|
||||
&revs,
|
||||
revs,
|
||||
FOR_EACH_OBJECT_PACK_ORDER);
|
||||
}
|
||||
|
||||
strbuf_release(&buf);
|
||||
string_list_clear(&include_packs, 0);
|
||||
string_list_clear(&exclude_packs, 0);
|
||||
}
|
||||
|
||||
static void add_unreachable_loose_objects(struct rev_info *revs);
|
||||
|
||||
static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
|
||||
{
|
||||
struct rev_info revs;
|
||||
|
||||
repo_init_revisions(the_repository, &revs, NULL);
|
||||
/*
|
||||
* Use a revision walk to fill in the namehash of objects in the include
|
||||
* packs. To save time, we'll avoid traversing through objects that are
|
||||
* in excluded packs.
|
||||
*
|
||||
* That may cause us to avoid populating all of the namehash fields of
|
||||
* all included objects, but our goal is best-effort, since this is only
|
||||
* an optimization during delta selection.
|
||||
*/
|
||||
revs.no_kept_objects = 1;
|
||||
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
|
||||
revs.blob_objects = 1;
|
||||
revs.tree_objects = 1;
|
||||
revs.tag_objects = 1;
|
||||
revs.ignore_missing_links = 1;
|
||||
|
||||
/* avoids adding objects in excluded packs */
|
||||
ignore_packed_keep_in_core = 1;
|
||||
read_packs_list_from_stdin(&revs);
|
||||
if (rev_list_unpacked)
|
||||
add_unreachable_loose_objects(&revs);
|
||||
|
||||
if (prepare_revision_walk(&revs))
|
||||
die(_("revision walk setup failed"));
|
||||
traverse_commit_list(&revs,
|
||||
show_commit_pack_hint,
|
||||
show_object_pack_hint,
|
||||
NULL);
|
||||
&mode);
|
||||
|
||||
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
|
||||
stdin_packs_found_nr);
|
||||
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
|
||||
stdin_packs_hints_nr);
|
||||
|
||||
strbuf_release(&buf);
|
||||
string_list_clear(&include_packs, 0);
|
||||
string_list_clear(&exclude_packs, 0);
|
||||
}
|
||||
|
||||
static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
|
||||
|
|
@ -4006,7 +4040,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
|
|||
}
|
||||
}
|
||||
|
||||
static void add_unreachable_loose_objects(void);
|
||||
static void add_objects_in_unpacked_packs(void);
|
||||
|
||||
static void enumerate_cruft_objects(void)
|
||||
|
|
@ -4016,7 +4049,7 @@ static void enumerate_cruft_objects(void)
|
|||
_("Enumerating cruft objects"), 0);
|
||||
|
||||
add_objects_in_unpacked_packs();
|
||||
add_unreachable_loose_objects();
|
||||
add_unreachable_loose_objects(NULL);
|
||||
|
||||
stop_progress(&progress_state);
|
||||
}
|
||||
|
|
@ -4294,8 +4327,9 @@ static void add_objects_in_unpacked_packs(void)
|
|||
}
|
||||
|
||||
static int add_loose_object(const struct object_id *oid, const char *path,
|
||||
void *data UNUSED)
|
||||
void *data)
|
||||
{
|
||||
struct rev_info *revs = data;
|
||||
enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL);
|
||||
|
||||
if (type < 0) {
|
||||
|
|
@ -4316,6 +4350,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
|
|||
} else {
|
||||
add_object_entry(oid, type, "", 0);
|
||||
}
|
||||
|
||||
if (revs && type == OBJ_COMMIT)
|
||||
add_pending_oid(revs, NULL, oid, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -4324,11 +4362,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
|
|||
* add_object_entry will weed out duplicates, so we just add every
|
||||
* loose object we find.
|
||||
*/
|
||||
static void add_unreachable_loose_objects(void)
|
||||
static void add_unreachable_loose_objects(struct rev_info *revs)
|
||||
{
|
||||
for_each_loose_file_in_objdir(repo_get_object_directory(the_repository),
|
||||
add_loose_object,
|
||||
NULL, NULL, NULL);
|
||||
add_loose_object, NULL, NULL, revs);
|
||||
}
|
||||
|
||||
static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
|
||||
|
|
@ -4675,7 +4712,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
|
|||
if (keep_unreachable)
|
||||
add_objects_in_unpacked_packs();
|
||||
if (pack_loose_unreachable)
|
||||
add_unreachable_loose_objects();
|
||||
add_unreachable_loose_objects(NULL);
|
||||
if (unpack_unreachable)
|
||||
loosen_unused_packed_objects();
|
||||
|
||||
|
|
@ -4782,6 +4819,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) {
|
|||
return is_not_in_promisor_pack_obj((struct object *) commit, data);
|
||||
}
|
||||
|
||||
static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
|
||||
int unset)
|
||||
{
|
||||
enum stdin_packs_mode *mode = opt->value;
|
||||
|
||||
if (unset)
|
||||
*mode = STDIN_PACKS_MODE_NONE;
|
||||
else if (!arg || !*arg)
|
||||
*mode = STDIN_PACKS_MODE_STANDARD;
|
||||
else if (!strcmp(arg, "follow"))
|
||||
*mode = STDIN_PACKS_MODE_FOLLOW;
|
||||
else
|
||||
die(_("invalid value for '%s': '%s'"), opt->long_name, arg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmd_pack_objects(int argc,
|
||||
const char **argv,
|
||||
const char *prefix,
|
||||
|
|
@ -4792,7 +4846,7 @@ int cmd_pack_objects(int argc,
|
|||
struct strvec rp = STRVEC_INIT;
|
||||
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
|
||||
int rev_list_index = 0;
|
||||
int stdin_packs = 0;
|
||||
enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
|
||||
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
|
||||
struct list_objects_filter_options filter_options =
|
||||
LIST_OBJECTS_FILTER_INIT;
|
||||
|
|
@ -4847,6 +4901,9 @@ int cmd_pack_objects(int argc,
|
|||
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
|
||||
N_("include objects referred to by the index"),
|
||||
1, PARSE_OPT_NONEG),
|
||||
OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
|
||||
N_("read packs from stdin"),
|
||||
PARSE_OPT_OPTARG, parse_stdin_packs_mode),
|
||||
OPT_BOOL(0, "stdin-packs", &stdin_packs,
|
||||
N_("read packs from stdin")),
|
||||
OPT_BOOL(0, "stdout", &pack_to_stdout,
|
||||
|
|
@ -5012,9 +5069,10 @@ int cmd_pack_objects(int argc,
|
|||
strvec_push(&rp, "--unpacked");
|
||||
}
|
||||
|
||||
if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
|
||||
die(_("options '%s' and '%s' cannot be used together"),
|
||||
"--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
|
||||
die_for_incompatible_opt2(exclude_promisor_objects,
|
||||
"--exclude-promisor-objects",
|
||||
exclude_promisor_objects_best_effort,
|
||||
"--exclude-promisor-objects-best-effort");
|
||||
if (exclude_promisor_objects) {
|
||||
use_internal_rev_list = 1;
|
||||
fetch_if_missing = 0;
|
||||
|
|
@ -5052,13 +5110,14 @@ int cmd_pack_objects(int argc,
|
|||
if (!pack_to_stdout && thin)
|
||||
die(_("--thin cannot be used to build an indexable pack"));
|
||||
|
||||
if (keep_unreachable && unpack_unreachable)
|
||||
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
|
||||
die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
|
||||
unpack_unreachable, "--unpack-unreachable");
|
||||
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
|
||||
unpack_unreachable_expiration = 0;
|
||||
|
||||
if (stdin_packs && filter_options.choice)
|
||||
die(_("cannot use --filter with --stdin-packs"));
|
||||
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
|
||||
filter_options.choice, "--filter");
|
||||
|
||||
|
||||
if (stdin_packs && use_internal_rev_list)
|
||||
die(_("cannot use internal rev list with --stdin-packs"));
|
||||
|
|
@ -5066,8 +5125,8 @@ int cmd_pack_objects(int argc,
|
|||
if (cruft) {
|
||||
if (use_internal_rev_list)
|
||||
die(_("cannot use internal rev list with --cruft"));
|
||||
if (stdin_packs)
|
||||
die(_("cannot use --stdin-packs with --cruft"));
|
||||
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
|
||||
cruft, "--cruft");
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -5135,11 +5194,7 @@ int cmd_pack_objects(int argc,
|
|||
progress_state = start_progress(the_repository,
|
||||
_("Enumerating objects"), 0);
|
||||
if (stdin_packs) {
|
||||
/* avoids adding objects in excluded packs */
|
||||
ignore_packed_keep_in_core = 1;
|
||||
read_packs_list_from_stdin();
|
||||
if (rev_list_unpacked)
|
||||
add_unreachable_loose_objects();
|
||||
read_stdin_packs(stdin_packs, rev_list_unpacked);
|
||||
} else if (cruft) {
|
||||
read_cruft_objects();
|
||||
} else if (!use_internal_rev_list) {
|
||||
|
|
|
|||
187
builtin/repack.c
187
builtin/repack.c
|
|
@ -39,6 +39,7 @@ static int write_bitmaps = -1;
|
|||
static int use_delta_islands;
|
||||
static int run_update_server_info = 1;
|
||||
static char *packdir, *packtmp_name, *packtmp;
|
||||
static int midx_must_contain_cruft = 1;
|
||||
|
||||
static const char *const git_repack_usage[] = {
|
||||
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
|
||||
|
|
@ -108,6 +109,10 @@ static int repack_config(const char *var, const char *value,
|
|||
free(cruft_po_args->threads);
|
||||
return git_config_string(&cruft_po_args->threads, var, value);
|
||||
}
|
||||
if (!strcmp(var, "repack.midxmustcontaincruft")) {
|
||||
midx_must_contain_cruft = git_config_bool(var, value);
|
||||
return 0;
|
||||
}
|
||||
return git_default_config(var, value, ctx, cb);
|
||||
}
|
||||
|
||||
|
|
@ -690,6 +695,77 @@ static void free_pack_geometry(struct pack_geometry *geometry)
|
|||
free(geometry->pack);
|
||||
}
|
||||
|
||||
static int midx_has_unknown_packs(char **midx_pack_names,
|
||||
size_t midx_pack_names_nr,
|
||||
struct string_list *include,
|
||||
struct pack_geometry *geometry,
|
||||
struct existing_packs *existing)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
string_list_sort(include);
|
||||
|
||||
for (i = 0; i < midx_pack_names_nr; i++) {
|
||||
const char *pack_name = midx_pack_names[i];
|
||||
|
||||
/*
|
||||
* Determine whether or not each MIDX'd pack from the existing
|
||||
* MIDX (if any) is represented in the new MIDX. For each pack
|
||||
* in the MIDX, it must either be:
|
||||
*
|
||||
* - In the "include" list of packs to be included in the new
|
||||
* MIDX. Note this function is called before the include
|
||||
* list is populated with any cruft pack(s).
|
||||
*
|
||||
* - Below the geometric split line (if using pack geometry),
|
||||
* indicating that the pack won't be included in the new
|
||||
* MIDX, but its contents were rolled up as part of the
|
||||
* geometric repack.
|
||||
*
|
||||
* - In the existing non-kept packs list (if not using pack
|
||||
* geometry), and marked as non-deleted.
|
||||
*/
|
||||
if (string_list_has_string(include, pack_name)) {
|
||||
continue;
|
||||
} else if (geometry) {
|
||||
struct strbuf buf = STRBUF_INIT;
|
||||
uint32_t j;
|
||||
|
||||
for (j = 0; j < geometry->split; j++) {
|
||||
strbuf_reset(&buf);
|
||||
strbuf_addstr(&buf, pack_basename(geometry->pack[j]));
|
||||
strbuf_strip_suffix(&buf, ".pack");
|
||||
strbuf_addstr(&buf, ".idx");
|
||||
|
||||
if (!strcmp(pack_name, buf.buf)) {
|
||||
strbuf_release(&buf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
strbuf_release(&buf);
|
||||
|
||||
if (j < geometry->split)
|
||||
continue;
|
||||
} else {
|
||||
struct string_list_item *item;
|
||||
|
||||
item = string_list_lookup(&existing->non_kept_packs,
|
||||
pack_name);
|
||||
if (item && !pack_is_marked_for_deletion(item))
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we got to this point, the MIDX includes some pack that we
|
||||
* don't know about.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct midx_snapshot_ref_data {
|
||||
struct tempfile *f;
|
||||
struct oidset seen;
|
||||
|
|
@ -758,6 +834,8 @@ static void midx_snapshot_refs(struct tempfile *f)
|
|||
|
||||
static void midx_included_packs(struct string_list *include,
|
||||
struct existing_packs *existing,
|
||||
char **midx_pack_names,
|
||||
size_t midx_pack_names_nr,
|
||||
struct string_list *names,
|
||||
struct pack_geometry *geometry)
|
||||
{
|
||||
|
|
@ -811,26 +889,56 @@ static void midx_included_packs(struct string_list *include,
|
|||
}
|
||||
}
|
||||
|
||||
for_each_string_list_item(item, &existing->cruft_packs) {
|
||||
if (midx_must_contain_cruft ||
|
||||
midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr,
|
||||
include, geometry, existing)) {
|
||||
/*
|
||||
* When doing a --geometric repack, there is no need to check
|
||||
* for deleted packs, since we're by definition not doing an
|
||||
* ALL_INTO_ONE repack (hence no packs will be deleted).
|
||||
* Otherwise we must check for and exclude any packs which are
|
||||
* enqueued for deletion.
|
||||
* If there are one or more unknown pack(s) present (see
|
||||
* midx_has_unknown_packs() for what makes a pack
|
||||
* "unknown") in the MIDX before the repack, keep them
|
||||
* as they may be required to form a reachability
|
||||
* closure if the MIDX is bitmapped.
|
||||
*
|
||||
* So we could omit the conditional below in the --geometric
|
||||
* case, but doing so is unnecessary since no packs are marked
|
||||
* as pending deletion (since we only call
|
||||
* `mark_packs_for_deletion()` when doing an all-into-one
|
||||
* repack).
|
||||
* For example, a cruft pack can be required to form a
|
||||
* reachability closure if the MIDX is bitmapped and one
|
||||
* or more of the bitmap's selected commits reaches a
|
||||
* once-cruft object that was later made reachable.
|
||||
*/
|
||||
if (pack_is_marked_for_deletion(item))
|
||||
continue;
|
||||
for_each_string_list_item(item, &existing->cruft_packs) {
|
||||
/*
|
||||
* When doing a --geometric repack, there is no
|
||||
* need to check for deleted packs, since we're
|
||||
* by definition not doing an ALL_INTO_ONE
|
||||
* repack (hence no packs will be deleted).
|
||||
* Otherwise we must check for and exclude any
|
||||
* packs which are enqueued for deletion.
|
||||
*
|
||||
* So we could omit the conditional below in the
|
||||
* --geometric case, but doing so is unnecessary
|
||||
* since no packs are marked as pending
|
||||
* deletion (since we only call
|
||||
* `mark_packs_for_deletion()` when doing an
|
||||
* all-into-one repack).
|
||||
*/
|
||||
if (pack_is_marked_for_deletion(item))
|
||||
continue;
|
||||
|
||||
strbuf_reset(&buf);
|
||||
strbuf_addf(&buf, "%s.idx", item->string);
|
||||
string_list_insert(include, buf.buf);
|
||||
strbuf_reset(&buf);
|
||||
strbuf_addf(&buf, "%s.idx", item->string);
|
||||
string_list_insert(include, buf.buf);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Modern versions of Git (with the appropriate
|
||||
* configuration setting) will write new copies of
|
||||
* once-cruft objects when doing a --geometric repack.
|
||||
*
|
||||
* If the MIDX has no cruft pack, new packs written
|
||||
* during a --geometric repack will not rely on the
|
||||
* cruft pack to form a reachability closure, so we can
|
||||
* avoid including them in the MIDX in that case.
|
||||
*/
|
||||
;
|
||||
}
|
||||
|
||||
strbuf_release(&buf);
|
||||
|
|
@ -1145,6 +1253,8 @@ int cmd_repack(int argc,
|
|||
struct tempfile *refs_snapshot = NULL;
|
||||
int i, ext, ret;
|
||||
int show_progress;
|
||||
char **midx_pack_names = NULL;
|
||||
size_t midx_pack_names_nr = 0;
|
||||
|
||||
/* variables to be filled by option parsing */
|
||||
int delete_redundant = 0;
|
||||
|
|
@ -1362,7 +1472,10 @@ int cmd_repack(int argc,
|
|||
!(pack_everything & PACK_CRUFT))
|
||||
strvec_push(&cmd.args, "--pack-loose-unreachable");
|
||||
} else if (geometry.split_factor) {
|
||||
strvec_push(&cmd.args, "--stdin-packs");
|
||||
if (midx_must_contain_cruft)
|
||||
strvec_push(&cmd.args, "--stdin-packs");
|
||||
else
|
||||
strvec_push(&cmd.args, "--stdin-packs=follow");
|
||||
strvec_push(&cmd.args, "--unpacked");
|
||||
} else {
|
||||
strvec_push(&cmd.args, "--unpacked");
|
||||
|
|
@ -1402,8 +1515,25 @@ int cmd_repack(int argc,
|
|||
if (ret)
|
||||
goto cleanup;
|
||||
|
||||
if (!names.nr && !po_args.quiet)
|
||||
printf_ln(_("Nothing new to pack."));
|
||||
if (!names.nr) {
|
||||
if (!po_args.quiet)
|
||||
printf_ln(_("Nothing new to pack."));
|
||||
/*
|
||||
* If we didn't write any new packs, the non-cruft packs
|
||||
* may refer to once-unreachable objects in the cruft
|
||||
* pack(s).
|
||||
*
|
||||
* If there isn't already a MIDX, the one we write
|
||||
* must include the cruft pack(s), in case the
|
||||
* non-cruft pack(s) refer to once-cruft objects.
|
||||
*
|
||||
* If there is already a MIDX, we can punt here, since
|
||||
* midx_has_unknown_packs() will make the decision for
|
||||
* us.
|
||||
*/
|
||||
if (!get_local_multi_pack_index(the_repository))
|
||||
midx_must_contain_cruft = 1;
|
||||
}
|
||||
|
||||
if (pack_everything & PACK_CRUFT) {
|
||||
const char *pack_prefix = find_pack_prefix(packdir, packtmp);
|
||||
|
|
@ -1484,6 +1614,19 @@ int cmd_repack(int argc,
|
|||
|
||||
string_list_sort(&names);
|
||||
|
||||
if (get_local_multi_pack_index(the_repository)) {
|
||||
struct multi_pack_index *m =
|
||||
get_local_multi_pack_index(the_repository);
|
||||
|
||||
ALLOC_ARRAY(midx_pack_names,
|
||||
m->num_packs + m->num_packs_in_base);
|
||||
|
||||
for (; m; m = m->base_midx)
|
||||
for (uint32_t i = 0; i < m->num_packs; i++)
|
||||
midx_pack_names[midx_pack_names_nr++] =
|
||||
xstrdup(m->pack_names[i]);
|
||||
}
|
||||
|
||||
close_object_store(the_repository->objects);
|
||||
|
||||
/*
|
||||
|
|
@ -1525,7 +1668,8 @@ int cmd_repack(int argc,
|
|||
|
||||
if (write_midx) {
|
||||
struct string_list include = STRING_LIST_INIT_DUP;
|
||||
midx_included_packs(&include, &existing, &names, &geometry);
|
||||
midx_included_packs(&include, &existing, midx_pack_names,
|
||||
midx_pack_names_nr, &names, &geometry);
|
||||
|
||||
ret = write_midx_included_packs(&include, &geometry, &names,
|
||||
refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
|
||||
|
|
@ -1576,6 +1720,9 @@ cleanup:
|
|||
string_list_clear(&names, 1);
|
||||
existing_packs_release(&existing);
|
||||
free_pack_geometry(&geometry);
|
||||
for (size_t i = 0; i < midx_pack_names_nr; i++)
|
||||
free(midx_pack_names[i]);
|
||||
free(midx_pack_names);
|
||||
pack_objects_args_release(&po_args);
|
||||
pack_objects_args_release(&cruft_po_args);
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ test_expect_success '--stdin-packs is incompatible with --filter' '
|
|||
cd stdin-packs &&
|
||||
test_must_fail git pack-objects --stdin-packs --stdout \
|
||||
--filter=blob:none </dev/null 2>err &&
|
||||
test_grep "cannot use --filter with --stdin-packs" err
|
||||
test_grep "options .--stdin-packs. and .--filter. cannot be used together" err
|
||||
)
|
||||
'
|
||||
|
||||
|
|
@ -236,4 +236,124 @@ test_expect_success 'pack-objects --stdin with packfiles from main and alternate
|
|||
test_cmp expected-objects actual-objects
|
||||
'
|
||||
|
||||
objdir=.git/objects
|
||||
packdir=$objdir/pack
|
||||
|
||||
objects_in_packs () {
|
||||
for p in "$@"
|
||||
do
|
||||
git show-index <"$packdir/pack-$p.idx" || return 1
|
||||
done >objects.raw &&
|
||||
|
||||
cut -d' ' -f2 objects.raw | sort &&
|
||||
rm -f objects.raw
|
||||
}
|
||||
|
||||
test_expect_success '--stdin-packs=follow walks into unknown packs' '
|
||||
test_when_finished "rm -fr repo" &&
|
||||
|
||||
git init repo &&
|
||||
(
|
||||
cd repo &&
|
||||
|
||||
for c in A B C D
|
||||
do
|
||||
test_commit "$c" || return 1
|
||||
done &&
|
||||
|
||||
A="$(echo A | git pack-objects --revs $packdir/pack)" &&
|
||||
B="$(echo A..B | git pack-objects --revs $packdir/pack)" &&
|
||||
C="$(echo B..C | git pack-objects --revs $packdir/pack)" &&
|
||||
D="$(echo C..D | git pack-objects --revs $packdir/pack)" &&
|
||||
test_commit E &&
|
||||
|
||||
git prune-packed &&
|
||||
|
||||
cat >in <<-EOF &&
|
||||
pack-$B.pack
|
||||
^pack-$C.pack
|
||||
pack-$D.pack
|
||||
EOF
|
||||
|
||||
# With just --stdin-packs, pack "A" is unknown to us, so
|
||||
# only objects from packs "B" and "D" are included in
|
||||
# the output pack.
|
||||
P=$(git pack-objects --stdin-packs $packdir/pack <in) &&
|
||||
objects_in_packs $B $D >expect &&
|
||||
objects_in_packs $P >actual &&
|
||||
test_cmp expect actual &&
|
||||
|
||||
# But with --stdin-packs=follow, objects from both
|
||||
# included packs reach objects from the unknown pack, so
|
||||
# objects from pack "A" is included in the output pack
|
||||
# in addition to the above.
|
||||
P=$(git pack-objects --stdin-packs=follow $packdir/pack <in) &&
|
||||
objects_in_packs $A $B $D >expect &&
|
||||
objects_in_packs $P >actual &&
|
||||
test_cmp expect actual &&
|
||||
|
||||
# And with --unpacked, we will pick up objects from unknown
|
||||
# packs that are reachable from loose objects. Loose object E
|
||||
# reaches objects in pack A, but there are three excluded packs
|
||||
# in between.
|
||||
#
|
||||
# The resulting pack should include objects reachable from E
|
||||
# that are not present in packs B, C, or D, along with those
|
||||
# present in pack A.
|
||||
cat >in <<-EOF &&
|
||||
^pack-$B.pack
|
||||
^pack-$C.pack
|
||||
^pack-$D.pack
|
||||
EOF
|
||||
|
||||
P=$(git pack-objects --stdin-packs=follow --unpacked \
|
||||
$packdir/pack <in) &&
|
||||
|
||||
{
|
||||
objects_in_packs $A &&
|
||||
git rev-list --objects --no-object-names D..E
|
||||
}>expect.raw &&
|
||||
sort expect.raw >expect &&
|
||||
objects_in_packs $P >actual &&
|
||||
test_cmp expect actual
|
||||
)
|
||||
'
|
||||
|
||||
stdin_packs__follow_with_only () {
|
||||
rm -fr stdin_packs__follow_with_only &&
|
||||
git init stdin_packs__follow_with_only &&
|
||||
(
|
||||
cd stdin_packs__follow_with_only &&
|
||||
|
||||
test_commit A &&
|
||||
test_commit B &&
|
||||
|
||||
git rev-parse "$@" >B.objects &&
|
||||
|
||||
echo A | git pack-objects --revs $packdir/pack &&
|
||||
B="$(git pack-objects $packdir/pack <B.objects)" &&
|
||||
|
||||
git cat-file --batch-check="%(objectname)" --batch-all-objects >objs &&
|
||||
for obj in $(cat objs)
|
||||
do
|
||||
rm -f $objdir/$(test_oid_to_path $obj) || return 1
|
||||
done &&
|
||||
|
||||
( cd $packdir && ls pack-*.pack ) >in &&
|
||||
git pack-objects --stdin-packs=follow --stdout >/dev/null <in
|
||||
)
|
||||
}
|
||||
|
||||
test_expect_success '--stdin-packs=follow tolerates missing blobs' '
|
||||
stdin_packs__follow_with_only HEAD HEAD^{tree}
|
||||
'
|
||||
|
||||
test_expect_success '--stdin-packs=follow tolerates missing trees' '
|
||||
stdin_packs__follow_with_only HEAD HEAD:B.t
|
||||
'
|
||||
|
||||
test_expect_success '--stdin-packs=follow tolerates missing commits' '
|
||||
stdin_packs__follow_with_only HEAD HEAD^{tree}
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
|||
|
|
@ -724,4 +724,149 @@ test_expect_success 'cruft repack respects --quiet' '
|
|||
)
|
||||
'
|
||||
|
||||
setup_cruft_exclude_tests() {
|
||||
git init "$1" &&
|
||||
(
|
||||
cd "$1" &&
|
||||
|
||||
git config repack.midxMustContainCruft false &&
|
||||
|
||||
test_commit one &&
|
||||
|
||||
test_commit --no-tag two &&
|
||||
two="$(git rev-parse HEAD)" &&
|
||||
test_commit --no-tag three &&
|
||||
three="$(git rev-parse HEAD)" &&
|
||||
git reset --hard one &&
|
||||
git reflog expire --all --expire=all &&
|
||||
|
||||
GIT_TEST_MULTI_PACK_INDEX=0 git repack --cruft -d &&
|
||||
|
||||
git merge $two &&
|
||||
test_commit four
|
||||
)
|
||||
}
|
||||
|
||||
test_expect_success 'repack --write-midx excludes cruft where possible' '
|
||||
setup_cruft_exclude_tests exclude-cruft-when-possible &&
|
||||
(
|
||||
cd exclude-cruft-when-possible &&
|
||||
|
||||
GIT_TEST_MULTI_PACK_INDEX=0 \
|
||||
git repack -d --geometric=2 --write-midx --write-bitmap-index &&
|
||||
|
||||
test-tool read-midx --show-objects $objdir >midx &&
|
||||
cruft="$(ls $packdir/*.mtimes)" &&
|
||||
test_grep ! "$(basename "$cruft" .mtimes).idx" midx &&
|
||||
|
||||
git rev-list --all --objects --no-object-names >reachable.raw &&
|
||||
sort reachable.raw >reachable.objects &&
|
||||
awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects &&
|
||||
|
||||
test_cmp reachable.objects midx.objects
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'repack --write-midx includes cruft when instructed' '
|
||||
setup_cruft_exclude_tests exclude-cruft-when-instructed &&
|
||||
(
|
||||
cd exclude-cruft-when-instructed &&
|
||||
|
||||
GIT_TEST_MULTI_PACK_INDEX=0 \
|
||||
git -c repack.midxMustContainCruft=true repack \
|
||||
-d --geometric=2 --write-midx --write-bitmap-index &&
|
||||
|
||||
test-tool read-midx --show-objects $objdir >midx &&
|
||||
cruft="$(ls $packdir/*.mtimes)" &&
|
||||
test_grep "$(basename "$cruft" .mtimes).idx" midx &&
|
||||
|
||||
git cat-file --batch-check="%(objectname)" --batch-all-objects \
|
||||
>all.objects &&
|
||||
awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects &&
|
||||
|
||||
test_cmp all.objects midx.objects
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'repack --write-midx includes cruft when necessary' '
|
||||
setup_cruft_exclude_tests exclude-cruft-when-necessary &&
|
||||
(
|
||||
cd exclude-cruft-when-necessary &&
|
||||
|
||||
test_path_is_file $(ls $packdir/pack-*.mtimes) &&
|
||||
( cd $packdir && ls pack-*.idx ) | sort >packs.all &&
|
||||
git multi-pack-index write --stdin-packs --bitmap <packs.all &&
|
||||
|
||||
test_commit five &&
|
||||
GIT_TEST_MULTI_PACK_INDEX=0 \
|
||||
git repack -d --geometric=2 --write-midx --write-bitmap-index &&
|
||||
|
||||
test-tool read-midx --show-objects $objdir >midx &&
|
||||
awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects &&
|
||||
git cat-file --batch-all-objects --batch-check="%(objectname)" \
|
||||
>expect.objects &&
|
||||
test_cmp expect.objects midx.objects &&
|
||||
|
||||
grep "^pack-" midx >midx.packs &&
|
||||
test_line_count = "$(($(wc -l <packs.all) + 1))" midx.packs
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'repack --write-midx includes cruft when already geometric' '
|
||||
git init repack--write-midx-geometric-noop &&
|
||||
(
|
||||
cd repack--write-midx-geometric-noop &&
|
||||
|
||||
git branch -M main &&
|
||||
test_commit A &&
|
||||
test_commit B &&
|
||||
|
||||
git checkout -B side &&
|
||||
test_commit --no-tag C &&
|
||||
C="$(git rev-parse HEAD)" &&
|
||||
|
||||
git checkout main &&
|
||||
git branch -D side &&
|
||||
git reflog expire --all --expire=all &&
|
||||
|
||||
# At this point we have two packs: one containing the
|
||||
# objects belonging to commits A and B, and another
|
||||
# (cruft) pack containing the objects belonging to
|
||||
# commit C.
|
||||
git repack --cruft -d &&
|
||||
|
||||
# Create a third pack which contains a merge commit
|
||||
# making commit C reachable again.
|
||||
#
|
||||
# --no-ff is important here, as it ensures that we
|
||||
# actually write a new object and subsequently a new
|
||||
# pack to contain it.
|
||||
git merge --no-ff $C &&
|
||||
git repack -d &&
|
||||
|
||||
ls $packdir/pack-*.idx | sort >packs.all &&
|
||||
cruft="$(ls $packdir/pack-*.mtimes)" &&
|
||||
cruft="${cruft%.mtimes}.idx" &&
|
||||
|
||||
for idx in $(grep -v $cruft <packs.all)
|
||||
do
|
||||
git show-index <$idx >out &&
|
||||
wc -l <out || return 1
|
||||
done >sizes.raw &&
|
||||
|
||||
# Make sure that there are two non-cruft packs, and
|
||||
# that one of them contains at least twice as many
|
||||
# objects as the other, ensuring that they are already
|
||||
# in a geometric progression.
|
||||
sort -n sizes.raw >sizes &&
|
||||
test_line_count = 2 sizes &&
|
||||
s1=$(head -n 1 sizes) &&
|
||||
s2=$(tail -n 1 sizes) &&
|
||||
test "$s2" -gt "$((2 * $s1))" &&
|
||||
|
||||
git -c repack.midxMustContainCruft=false repack --geometric=2 \
|
||||
--write-midx --write-bitmap-index
|
||||
)
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
|
|||
Loading…
Reference in New Issue