repack: begin combining cruft packs with `--combine-cruft-below-size`

The previous commit changed the behavior of repack's '--max-cruft-size'
to specify a cruft pack-specific override for '--max-pack-size'.

Introduce a new flag, '--combine-cruft-below-size' which is a
replacement for the old behavior of '--max-cruft-size'. This new flag
does explicitly what it says: it combines together cruft packs which are
smaller than a given threshold, and leaves alone ones which are
larger.

This accomplishes the original intent of '--max-cruft-size', which was
to avoid repacking cruft packs larger than the given threshold.

The new behavior is slightly different. Instead of building up small
packs together until the threshold is met, '--combine-cruft-below-size'
packs up *all* cruft packs smaller than the threshold. This means that
we may make a pack much larger than the given threshold (e.g., if you
aggregate 5 packs which are each 99 MiB in size with a threshold of 100
MiB).

But that's OK: the point isn't to restrict the size of the cruft packs
we generate, it's to avoid working with ones that have already grown too
large. If repositories still want to limit the size of the generated
cruft pack(s), they may use '--max-cruft-size'.

There's some minor test fallout as a result of the slight differences in
behavior between the old meaning of '--max-cruft-size' and the behavior
of '--combine-cruft-below-size'. In the test which is now called
"--combine-cruft-below-size combines packs", we need to use the new flag
over the old one to exercise that test's intended behavior. The
remainder of the changes there are to improve the clarity of the
comments.

Suggested-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Taylor Blau 2025-03-19 18:52:58 -04:00 committed by Junio C Hamano
parent 0855ed966c
commit 484d7adcda
3 changed files with 47 additions and 22 deletions

View File

@ -81,6 +81,15 @@ to the new separate pack will be written.
`--max-pack-size` (if any) by default. See the documentation for `--max-pack-size` (if any) by default. See the documentation for
`--max-pack-size` for more details. `--max-pack-size` for more details.


--combine-cruft-below-size=<n>::
When generating cruft packs without pruning, only repack
existing cruft packs whose size is strictly less than `<n>`,
where `<n>` represents a number of bytes, which can optionally
be suffixed with "k", "m", or "g". Cruft packs whose size is
greater than or equal to `<n>` are left as-is and not repacked.
Useful when you want to avoid repacking large cruft pack(s) in
repositories that have many and/or large unreachable objects.

--expire-to=<dir>:: --expire-to=<dir>::
Write a cruft pack containing pruned objects (if any) to the Write a cruft pack containing pruned objects (if any) to the
directory `<dir>`. This option is useful for keeping a copy of directory `<dir>`. This option is useful for keeping a copy of

View File

@ -1022,20 +1022,13 @@ static int write_filtered_pack(const struct pack_objects_args *args,
return finish_pack_objects_cmd(&cmd, names, local); return finish_pack_objects_cmd(&cmd, names, local);
} }


static void collapse_small_cruft_packs(FILE *in, size_t max_size UNUSED, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size,
struct existing_packs *existing) struct existing_packs *existing)
{ {
struct packed_git *p; struct packed_git *p;
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
size_t i; size_t i;


/*
* Squelch a -Wunused-function warning while we rationalize
* the behavior of --max-cruft-size. This function will become
* used again in a future commit.
*/
(void)retain_cruft_pack;

for (p = get_all_packs(the_repository); p; p = p->next) { for (p = get_all_packs(the_repository); p; p = p->next) {
if (!(p->is_cruft && p->pack_local)) if (!(p->is_cruft && p->pack_local))
continue; continue;
@ -1047,7 +1040,12 @@ static void collapse_small_cruft_packs(FILE *in, size_t max_size UNUSED,
if (!string_list_has_string(&existing->cruft_packs, buf.buf)) if (!string_list_has_string(&existing->cruft_packs, buf.buf))
continue; continue;


fprintf(in, "-%s.pack\n", buf.buf); if (p->pack_size < combine_cruft_below_size) {
fprintf(in, "-%s\n", pack_basename(p));
} else {
retain_cruft_pack(existing, p);
fprintf(in, "%s\n", pack_basename(p));
}
} }


for (i = 0; i < existing->non_kept_packs.nr; i++) for (i = 0; i < existing->non_kept_packs.nr; i++)
@ -1061,6 +1059,7 @@ static int write_cruft_pack(const struct pack_objects_args *args,
const char *destination, const char *destination,
const char *pack_prefix, const char *pack_prefix,
const char *cruft_expiration, const char *cruft_expiration,
unsigned long combine_cruft_below_size,
struct string_list *names, struct string_list *names,
struct existing_packs *existing) struct existing_packs *existing)
{ {
@ -1103,8 +1102,9 @@ static int write_cruft_pack(const struct pack_objects_args *args,
in = xfdopen(cmd.in, "w"); in = xfdopen(cmd.in, "w");
for_each_string_list_item(item, names) for_each_string_list_item(item, names)
fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); fprintf(in, "%s-%s.pack\n", pack_prefix, item->string);
if (args->max_pack_size && !cruft_expiration) { if (combine_cruft_below_size && !cruft_expiration) {
collapse_small_cruft_packs(in, args->max_pack_size, existing); combine_small_cruft_packs(in, combine_cruft_below_size,
existing);
} else { } else {
for_each_string_list_item(item, &existing->non_kept_packs) for_each_string_list_item(item, &existing->non_kept_packs)
fprintf(in, "-%s.pack\n", item->string); fprintf(in, "-%s.pack\n", item->string);
@ -1158,6 +1158,7 @@ int cmd_repack(int argc,
const char *opt_window_memory = NULL; const char *opt_window_memory = NULL;
const char *opt_depth = NULL; const char *opt_depth = NULL;
const char *opt_threads = NULL; const char *opt_threads = NULL;
unsigned long combine_cruft_below_size = 0ul;


struct option builtin_repack_options[] = { struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything, OPT_BIT('a', NULL, &pack_everything,
@ -1170,6 +1171,9 @@ int cmd_repack(int argc,
PACK_CRUFT), PACK_CRUFT),
OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"), OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"),
N_("with --cruft, expire objects older than this")), N_("with --cruft, expire objects older than this")),
OPT_MAGNITUDE(0, "combine-cruft-below-size",
&combine_cruft_below_size,
N_("with --cruft, only repack cruft packs smaller than this")),
OPT_MAGNITUDE(0, "max-cruft-size", &cruft_po_args.max_pack_size, OPT_MAGNITUDE(0, "max-cruft-size", &cruft_po_args.max_pack_size,
N_("with --cruft, limit the size of new cruft packs")), N_("with --cruft, limit the size of new cruft packs")),
OPT_BOOL('d', NULL, &delete_redundant, OPT_BOOL('d', NULL, &delete_redundant,
@ -1413,7 +1417,8 @@ int cmd_repack(int argc,
cruft_po_args.quiet = po_args.quiet; cruft_po_args.quiet = po_args.quiet;


ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix,
cruft_expiration, &names, cruft_expiration,
combine_cruft_below_size, &names,
&existing); &existing);
if (ret) if (ret)
goto cleanup; goto cleanup;
@ -1440,10 +1445,17 @@ int cmd_repack(int argc,
* generate an empty pack (since every object not in the * generate an empty pack (since every object not in the
* cruft pack generated above will have an mtime older * cruft pack generated above will have an mtime older
* than the expiration). * than the expiration).
*
* Pretend we don't have a `--combine-cruft-below-size`
* argument, since we're not selectively combining
* anything based on size to generate the limbo cruft
* pack, but rather removing all cruft packs from the
* main repository regardless of size.
*/ */
ret = write_cruft_pack(&cruft_po_args, expire_to, ret = write_cruft_pack(&cruft_po_args, expire_to,
pack_prefix, pack_prefix,
NULL, NULL,
0ul,
&names, &names,
&existing); &existing);
if (ret) if (ret)

View File

@ -194,10 +194,13 @@ test_expect_success '--max-cruft-size combines existing packs when not too large
) )
' '


test_expect_failure '--max-cruft-size combines smaller packs first' ' test_expect_success '--combine-cruft-below-size combines packs' '
git init max-cruft-size-consume-small && repo=combine-cruft-below-size &&
test_when_finished "rm -fr $repo" &&

git init "$repo" &&
( (
cd max-cruft-size-consume-small && cd "$repo" &&


test_commit base && test_commit base &&
git repack -ad && git repack -ad &&
@ -211,11 +214,11 @@ test_expect_failure '--max-cruft-size combines smaller packs first' '
test-tool pack-mtimes "$(basename $cruft_bar)" >>expect.raw && test-tool pack-mtimes "$(basename $cruft_bar)" >>expect.raw &&
sort expect.raw >expect.objects && sort expect.raw >expect.objects &&


# repacking with `--max-cruft-size=2M` should combine # Repacking with `--combine-cruft-below-size=1M`
# both 0.5 MiB packs together, instead of, say, one of # should combine both 0.5 MiB packs together, but
# the 0.5 MiB packs with the 1.0 MiB pack # ignore the two packs which are >= 1.0 MiB.
ls $packdir/pack-*.mtimes | sort >cruft.before && ls $packdir/pack-*.mtimes | sort >cruft.before &&
git repack -d --cruft --max-cruft-size=2M && git repack -d --cruft --combine-cruft-below-size=1M &&
ls $packdir/pack-*.mtimes | sort >cruft.after && ls $packdir/pack-*.mtimes | sort >cruft.after &&


comm -13 cruft.before cruft.after >cruft.new && comm -13 cruft.before cruft.after >cruft.new &&
@ -224,11 +227,12 @@ test_expect_failure '--max-cruft-size combines smaller packs first' '
test_line_count = 1 cruft.new && test_line_count = 1 cruft.new &&
test_line_count = 2 cruft.removed && test_line_count = 2 cruft.removed &&


# the two smaller packs should be rolled up first # The two packs smaller than 1.0MiB should be repacked
# together.
printf "%s\n" $cruft_foo $cruft_bar | sort >expect.removed && printf "%s\n" $cruft_foo $cruft_bar | sort >expect.removed &&
test_cmp expect.removed cruft.removed && test_cmp expect.removed cruft.removed &&


# ...and contain the set of objects rolled up # ...and contain the set of objects rolled up.
test-tool pack-mtimes "$(basename $(cat cruft.new))" >actual.raw && test-tool pack-mtimes "$(basename $(cat cruft.new))" >actual.raw &&
sort actual.raw >actual.objects && sort actual.raw >actual.objects &&