diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt index ca47eb2008..83ebc2de55 100644 --- a/Documentation/config/gc.txt +++ b/Documentation/config/gc.txt @@ -86,6 +86,12 @@ gc.cruftPacks:: linkgit:git-repack[1]) instead of as loose objects. The default is `true`. +gc.maxCruftSize:: + Limit the size of new cruft packs when repacking. When + specified in addition to `--max-cruft-size`, the command line + option takes priority. See the `--max-cruft-size` option of + linkgit:git-repack[1]. + gc.pruneExpire:: When 'git gc' is run, it will call 'prune --expire 2.weeks.ago' (and 'repack --cruft --cruft-expiration 2.weeks.ago' if using diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index 90806fd26a..b5561c458a 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -59,6 +59,13 @@ be performed as well. cruft pack instead of storing them as loose objects. `--cruft` is on by default. +--max-cruft-size=:: + When packing unreachable objects into a cruft pack, limit the + size of new cruft packs to be at most `` bytes. Overrides any + value specified via the `gc.maxCruftSize` configuration. See + the `--max-cruft-size` option of linkgit:git-repack[1] for + more. + --prune=:: Prune loose objects older than date (default is 2 weeks ago, overridable by the config variable `gc.pruneExpire`). diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 4017157949..fbfc72e1b2 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -74,6 +74,17 @@ to the new separate pack will be written. immediately instead of waiting for the next `git gc` invocation. Only useful with `--cruft -d`. +--max-cruft-size=:: + Repack cruft objects into packs as large as `` bytes before + creating new packs. As long as there are enough cruft packs + smaller than ``, repacking will cause a new cruft pack to + be created containing objects from any combined cruft packs, + along with any new unreachable objects. Cruft packs larger than + `` will not be modified. When the new cruft pack is larger + than `` bytes, it will be split into multiple packs, all of + which are guaranteed to be at most `` bytes in size. Only + useful with `--cruft -d`. + --expire-to=:: Write a cruft pack containing pruned objects (if any) to the directory ``. This option is useful for keeping a copy of diff --git a/builtin/gc.c b/builtin/gc.c index 00192ae5d3..c97c9fb046 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -52,6 +52,7 @@ static const char * const builtin_gc_usage[] = { static int pack_refs = 1; static int prune_reflogs = 1; static int cruft_packs = 1; +static unsigned long max_cruft_size; static int aggressive_depth = 50; static int aggressive_window = 250; static int gc_auto_threshold = 6700; @@ -163,6 +164,7 @@ static void gc_config(void) git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit); git_config_get_bool("gc.autodetach", &detach_auto); git_config_get_bool("gc.cruftpacks", &cruft_packs); + git_config_get_ulong("gc.maxcruftsize", &max_cruft_size); git_config_get_expiry("gc.pruneexpire", &prune_expire); git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire); git_config_get_expiry("gc.logexpiry", &gc_log_expire); @@ -347,6 +349,9 @@ static void add_repack_all_option(struct string_list *keep_pack) strvec_push(&repack, "--cruft"); if (prune_expire) strvec_pushf(&repack, "--cruft-expiration=%s", prune_expire); + if (max_cruft_size) + strvec_pushf(&repack, "--max-cruft-size=%lu", + max_cruft_size); } else { strvec_push(&repack, "-A"); if (prune_expire) @@ -575,6 +580,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) N_("prune unreferenced objects"), PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire }, OPT_BOOL(0, "cruft", &cruft_packs, N_("pack unreferenced objects separately")), + OPT_MAGNITUDE(0, "max-cruft-size", &max_cruft_size, + N_("with --cruft, limit the size of new cruft packs")), OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")), OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"), PARSE_OPT_NOCOMPLETE), diff --git a/builtin/repack.c b/builtin/repack.c index 8a5bbb9cba..04770b15fe 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -27,6 +27,7 @@ #define PACK_CRUFT 4 #define DELETE_PACK 1 +#define RETAIN_PACK 2 static int pack_everything; static int delta_base_offset = 1; @@ -116,11 +117,26 @@ static void pack_mark_for_deletion(struct string_list_item *item) item->util = (void*)((uintptr_t)item->util | DELETE_PACK); } +static void pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + static int pack_is_marked_for_deletion(struct string_list_item *item) { return (uintptr_t)item->util & DELETE_PACK; } +static void pack_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + static void mark_packs_for_deletion_1(struct string_list *names, struct string_list *list) { @@ -133,17 +149,39 @@ static void mark_packs_for_deletion_1(struct string_list *names, if (len < hexsz) continue; sha1 = item->string + len - hexsz; - /* - * Mark this pack for deletion, which ensures that this - * pack won't be included in a MIDX (if `--write-midx` - * was given) and that we will actually delete this pack - * (if `-d` was given). - */ - if (!string_list_has_string(names, sha1)) + + if (pack_is_retained(item)) { + pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ pack_mark_for_deletion(item); + } } } +static void retain_cruft_pack(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + pack_mark_retained(item); + strbuf_release(&buf); +} + static void mark_packs_for_deletion(struct existing_packs *existing, struct string_list *names) @@ -225,6 +263,8 @@ static void collect_pack_filenames(struct existing_packs *existing, } string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); strbuf_release(&buf); } @@ -806,6 +846,72 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } +static int existing_cruft_pack_cmp(const void *va, const void *vb) +{ + struct packed_git *a = *(struct packed_git **)va; + struct packed_git *b = *(struct packed_git **)vb; + + if (a->pack_size < b->pack_size) + return -1; + if (a->pack_size > b->pack_size) + return 1; + return 0; +} + +static void collapse_small_cruft_packs(FILE *in, size_t max_size, + struct existing_packs *existing) +{ + struct packed_git **existing_cruft, *p; + struct strbuf buf = STRBUF_INIT; + size_t total_size = 0; + size_t existing_cruft_nr = 0; + size_t i; + + ALLOC_ARRAY(existing_cruft, existing->cruft_packs.nr); + + for (p = get_all_packs(the_repository); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (existing_cruft_nr >= existing->cruft_packs.nr) + BUG("too many cruft packs (found %"PRIuMAX", but knew " + "of %"PRIuMAX")", + (uintmax_t)existing_cruft_nr + 1, + (uintmax_t)existing->cruft_packs.nr); + existing_cruft[existing_cruft_nr++] = p; + } + + QSORT(existing_cruft, existing_cruft_nr, existing_cruft_pack_cmp); + + for (i = 0; i < existing_cruft_nr; i++) { + size_t proposed; + + p = existing_cruft[i]; + proposed = st_add(total_size, p->pack_size); + + if (proposed <= max_size) { + total_size = proposed; + fprintf(in, "-%s\n", pack_basename(p)); + } else { + retain_cruft_pack(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + static int write_cruft_pack(const struct pack_objects_args *args, const char *destination, const char *pack_prefix, @@ -853,10 +959,14 @@ static int write_cruft_pack(const struct pack_objects_args *args, in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); + if (args->max_pack_size && !cruft_expiration) { + collapse_small_cruft_packs(in, args->max_pack_size, existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s.pack\n", item->string); fclose(in); @@ -919,6 +1029,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) PACK_CRUFT), OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"), N_("with --cruft, expire objects older than this")), + OPT_MAGNITUDE(0, "max-cruft-size", &cruft_po_args.max_pack_size, + N_("with --cruft, limit the size of new cruft packs")), OPT_BOOL('d', NULL, &delete_redundant, N_("remove redundant packs, and run git-prune-packed")), OPT_BOOL('f', NULL, &po_args.no_reuse_delta, diff --git a/t/t6500-gc.sh b/t/t6500-gc.sh index 69509d0c11..0d6b5c3b27 100755 --- a/t/t6500-gc.sh +++ b/t/t6500-gc.sh @@ -303,6 +303,33 @@ test_expect_success 'gc.bigPackThreshold ignores cruft packs' ' ) ' +cruft_max_size_opts="git repack -d -l --cruft --cruft-expiration=2.weeks.ago" + +test_expect_success 'setup for --max-cruft-size tests' ' + git init cruft--max-size && + ( + cd cruft--max-size && + prepare_cruft_history + ) +' + +test_expect_success '--max-cruft-size sets appropriate repack options' ' + GIT_TRACE2_EVENT=$(pwd)/trace2.txt git -C cruft--max-size \ + gc --cruft --max-cruft-size=1M && + test_subcommand $cruft_max_size_opts --max-cruft-size=1048576 blob && + git hash-object -w -t blob blob && + rm blob +} + +pack_random_blob () { + generate_random_blob "$@" && + git repack -d -q >/dev/null +} + +generate_cruft_pack () { + pack_random_blob "$@" >/dev/null && + + ls $packdir/pack-*.pack | xargs -n 1 basename >in && + pack="$(git pack-objects --cruft $packdir/pack foo.objects && + test-tool pack-mtimes $(basename "$cruft_bar") >bar.objects && + + grep "^$foo" foo.objects && + test_line_count = 1 foo.objects && + grep "^$bar" bar.objects && + test_line_count = 1 bar.objects + ) +' + +test_expect_success '--max-cruft-size combines existing packs when below threshold' ' + git init max-cruft-size-small && + ( + cd max-cruft-size-small && + test_commit base && + + foo="$(pack_random_blob foo $((1*1024*1024)))" && + git repack --cruft -d && + + bar="$(pack_random_blob bar $((1*1024*1024)))" && + git repack --cruft -d --max-cruft-size=10M && + + cruft=$(ls $packdir/pack-*.mtimes) && + test-tool pack-mtimes $(basename "$cruft") >cruft.objects && + + grep "^$foo" cruft.objects && + grep "^$bar" cruft.objects && + test_line_count = 2 cruft.objects + ) +' + +test_expect_success '--max-cruft-size combines smaller packs first' ' + git init max-cruft-size-consume-small && + ( + cd max-cruft-size-consume-small && + + test_commit base && + git repack -ad && + + cruft_foo="$(generate_cruft_pack foo 524288)" && # 0.5 MiB + cruft_bar="$(generate_cruft_pack bar 524288)" && # 0.5 MiB + cruft_baz="$(generate_cruft_pack baz 1048576)" && # 1.0 MiB + cruft_quux="$(generate_cruft_pack quux 1572864)" && # 1.5 MiB + + test-tool pack-mtimes "$(basename $cruft_foo)" >expect.raw && + test-tool pack-mtimes "$(basename $cruft_bar)" >>expect.raw && + sort expect.raw >expect.objects && + + # repacking with `--max-cruft-size=2M` should combine + # both 0.5 MiB packs together, instead of, say, one of + # the 0.5 MiB packs with the 1.0 MiB pack + ls $packdir/pack-*.mtimes | sort >cruft.before && + git repack -d --cruft --max-cruft-size=2M && + ls $packdir/pack-*.mtimes | sort >cruft.after && + + comm -13 cruft.before cruft.after >cruft.new && + comm -23 cruft.before cruft.after >cruft.removed && + + test_line_count = 1 cruft.new && + test_line_count = 2 cruft.removed && + + # the two smaller packs should be rolled up first + printf "%s\n" $cruft_foo $cruft_bar | sort >expect.removed && + test_cmp expect.removed cruft.removed && + + # ...and contain the set of objects rolled up + test-tool pack-mtimes "$(basename $(cat cruft.new))" >actual.raw && + sort actual.raw >actual.objects && + + test_cmp expect.objects actual.objects + ) +' + +test_expect_success 'setup --max-cruft-size with freshened objects' ' + git init max-cruft-size-freshen && + ( + cd max-cruft-size-freshen && + + test_commit base && + git repack -ad && + + foo="$(generate_random_blob foo 64)" && + test-tool chmtime --get -10000 \ + "$objdir/$(test_oid_to_path "$foo")" >foo.mtime && + + git repack --cruft -d && + + cruft="$(ls $packdir/pack-*.mtimes)" && + test-tool pack-mtimes "$(basename $cruft)" >actual && + echo "$foo $(cat foo.mtime)" >expect && + test_cmp expect actual + ) +' + +test_expect_success '--max-cruft-size with freshened objects (loose)' ' + ( + cd max-cruft-size-freshen && + + # regenerate the object, setting its mtime to be more recent + foo="$(generate_random_blob foo 64)" && + test-tool chmtime --get -100 \ + "$objdir/$(test_oid_to_path "$foo")" >foo.mtime && + + git repack --cruft -d && + + cruft="$(ls $packdir/pack-*.mtimes)" && + test-tool pack-mtimes "$(basename $cruft)" >actual && + echo "$foo $(cat foo.mtime)" >expect && + test_cmp expect actual + ) +' + +test_expect_success '--max-cruft-size with freshened objects (packed)' ' + ( + cd max-cruft-size-freshen && + + # regenerate the object and store it in a packfile, + # setting its mtime to be more recent + # + # store it alongside another cruft object so that we + # do not create an identical copy of the existing + # cruft pack (which contains $foo). + foo="$(generate_random_blob foo 64)" && + bar="$(generate_random_blob bar 64)" && + foo_pack="$(printf "%s\n" $foo $bar | git pack-objects $packdir/pack)" && + git prune-packed && + + test-tool chmtime --get -10 \ + "$packdir/pack-$foo_pack.pack" >foo.mtime && + + git repack --cruft -d && + + cruft="$(ls $packdir/pack-*.mtimes)" && + test-tool pack-mtimes "$(basename $cruft)" >actual && + echo "$foo $(cat foo.mtime)" >expect.raw && + echo "$bar $(cat foo.mtime)" >>expect.raw && + sort expect.raw >expect && + test_cmp expect actual + ) +' + +test_expect_success '--max-cruft-size with pruning' ' + git init max-cruft-size-prune && + ( + cd max-cruft-size-prune && + + test_commit base && + foo="$(generate_random_blob foo $((1024*1024)))" && + bar="$(generate_random_blob bar $((1024*1024)))" && + baz="$(generate_random_blob baz $((1024*1024)))" && + + test-tool chmtime -10000 "$objdir/$(test_oid_to_path "$foo")" && + + git repack -d --cruft --max-cruft-size=1M && + + # backdate the mtimes of all cruft packs to validate + # that they were rewritten as a result of pruning + ls $packdir/pack-*.mtimes | sort >cruft.before && + for cruft in $(cat cruft.before) + do + mtime="$(test-tool chmtime --get -10000 "$cruft")" && + echo $cruft $mtime >>mtimes || return 1 + done && + + # repack (and prune) with a --max-cruft-size to ensure + # that we appropriately split the resulting set of packs + git repack -d --cruft --max-cruft-size=1M \ + --cruft-expiration=10.seconds.ago && + ls $packdir/pack-*.mtimes | sort >cruft.after && + + for cruft in $(cat cruft.after) + do + old_mtime="$(grep $cruft mtimes | cut -d" " -f2)" && + new_mtime="$(test-tool chmtime --get $cruft)" && + test $old_mtime -lt $new_mtime || return 1 + done && + + test_line_count = 3 cruft.before && + test_line_count = 2 cruft.after && + test_must_fail git cat-file -e $foo && + git cat-file -e $bar && + git cat-file -e $baz + ) +' + +test_expect_success '--max-cruft-size ignores non-local packs' ' + repo="max-cruft-size-non-local" && + git init $repo && + ( + cd $repo && + test_commit base && + generate_random_blob foo 64 && + git repack --cruft -d + ) && + + git clone --reference=$repo $repo $repo-alt && + ( + cd $repo-alt && + + test_commit other && + generate_random_blob bar 64 && + + # ensure that we do not attempt to pick up packs from + # the non-alternated repository, which would result in a + # crash + git repack --cruft --max-cruft-size=1M -d + ) +' + test_done