diff --git a/Documentation/config/repack.adoc b/Documentation/config/repack.adoc index e9e78dcb19..4c22a499f6 100644 --- a/Documentation/config/repack.adoc +++ b/Documentation/config/repack.adoc @@ -46,3 +46,21 @@ repack.midxMustContainCruft:: `--write-midx`. When false, cruft packs are only included in the MIDX when necessary (e.g., because they might be required to form a reachability closure with MIDX bitmaps). Defaults to true. + +repack.midxSplitFactor:: + The factor used in the geometric merging condition when + compacting incremental MIDX layers during `git repack` when + invoked with the `--write-midx=incremental` option. ++ +Adjacent layers are merged when the accumulated object count of the +newer layer exceeds `1/` of the object count of the next deeper +layer. Must be at least 2. Defaults to 2. + +repack.midxNewLayerThreshold:: + The minimum number of packs in the tip MIDX layer before those + packs are considered as candidates for geometric repacking + during `git repack --write-midx=incremental`. ++ +When the tip layer has fewer packs than this threshold, those packs are +excluded from the geometric repack entirely, and are thus left +unmodified. Must be at least 1. Defaults to 8. diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 3a5aa22778..c6d23aeeb9 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -11,9 +11,11 @@ SYNOPSIS [verse] 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] - [--refs-snapshot=] + [--refs-snapshot=] [--[no-]write-chain-file] + [--base=] 'git multi-pack-index' [] compact [--[no-]incremental] - [--[no-]bitmap] + [--[no-]bitmap] [--base=] [--[no-]write-chain-file] + 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -83,6 +85,20 @@ marker). and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when necessary. + + --[no-]write-chain-file:: + When used with `--incremental`, write a new MIDX layer + but do not update the multi-pack-index-chain file. + The checksum of the new layer is printed to standard + output, allowing the caller to assemble and write the + chain itself. Requires `--incremental`. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base when writing a new incremental layer. + The special value `none` indicates that the new layer + should have no base (i.e., it becomes a root layer). + Requires `--no-write-chain-file`. -- compact:: @@ -97,6 +113,18 @@ compact:: --[no-]bitmap:: Control whether or not a multi-pack bitmap is written. + + --[no-]write-chain-file:: + When used with `--incremental`, write a new compacted + MIDX layer but do not update the multi-pack-index-chain + file. The checksum of the new layer is printed to + standard output. Requires `--incremental`. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base for the compacted result, instead of using + the immediate parent of ``. The special value + `none` indicates that the result should have no base. -- + Note that the compact command requires writing a version-2 midx that diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index 673ce91083..72c42015e2 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -11,7 +11,7 @@ SYNOPSIS [verse] 'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=] [--depth=] [--threads=] [--keep-pack=] - [--write-midx] [--name-hash-version=] [--path-walk] + [--write-midx[=]] [--name-hash-version=] [--path-walk] DESCRIPTION ----------- @@ -250,9 +250,47 @@ pack as the preferred pack for object selection by the MIDX (see linkgit:git-multi-pack-index[1]). -m:: ---write-midx:: +--write-midx[=]:: Write a multi-pack index (see linkgit:git-multi-pack-index[1]) - containing the non-redundant packs. + containing the non-redundant packs. The following modes are + available: ++ +-- + `default`;; + Write a single MIDX covering all packs. This is the + default when `--write-midx` is given without an + explicit mode. + + `incremental`;; + Write an incremental MIDX chain instead of a single + flat MIDX. ++ +Without `--geometric`, a new MIDX layer is appended to the existing +chain (or a new chain is started) containing whatever packs were written +by the repack. Existing layers are preserved as-is. ++ +When combined with `--geometric`, the incremental mode maintains a chain +of MIDX layers that is compacted over time using a geometric merging +strategy. Each repack creates a new tip layer containing the newly +written pack(s). Adjacent layers are then merged whenever the newer +layer's object count exceeds `1/repack.midxSplitFactor` of the next +deeper layer's count. Layers that do not meet this condition are +retained as-is. ++ +The result is that newer (tip) layers tend to contain many small packs +with relatively few objects, while older (deeper) layers contain fewer, +larger packs covering more objects. Because compaction is driven by the +tip of the chain, newer layers are also rewritten more frequently than +older ones, which are only touched when enough objects have accumulated +to justify merging into them. This keeps the total number of layers +logarithmic relative to the total number of objects. ++ +Only packs in the tip MIDX layer are considered as candidates for the +geometric repack; packs in deeper layers are left untouched. If the tip +layer contains fewer packs than `repack.midxNewLayerThreshold`, those +packs are excluded from the geometry entirely, and a new layer is +created for any new pack(s) without disturbing the existing chain. +-- --name-hash-version=:: Provide this argument to the underlying `git pack-objects` process. diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 0f72d96c02..00ffb36394 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -16,11 +16,13 @@ #define BUILTIN_MIDX_WRITE_USAGE \ N_("git multi-pack-index [] write [--preferred-pack=]\n" \ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ - " [--refs-snapshot=]") + " [--refs-snapshot=] [--[no-]write-chain-file]\n" \ + " [--base=]") #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " [--[no-]bitmap] ") + " [--[no-]bitmap] [--base=] [--[no-]write-chain-file]\n" \ + " ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -63,6 +65,7 @@ static char const * const builtin_multi_pack_index_usage[] = { static struct opts_multi_pack_index { char *object_dir; const char *preferred_pack; + const char *incremental_base; char *refs_snapshot; unsigned long batch_size; unsigned flags; @@ -151,8 +154,13 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_NEGBIT(0, "write-chain-file", &opts.flags, + N_("write the multi-pack-index chain file"), + MIDX_WRITE_NO_CHAIN), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, N_("write multi-pack index containing only given indexes")), OPT_FILENAME(0, "refs-snapshot", &opts.refs_snapshot, @@ -178,6 +186,22 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_write_usage, options); + + if (opts.flags & MIDX_WRITE_NO_CHAIN && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--no-write-chain-file", "--incremental"); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + + if (opts.incremental_base && + !(opts.flags & MIDX_WRITE_NO_CHAIN)) { + error(_("cannot use --base without --no-write-chain-file")); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -189,7 +213,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, ret = write_midx_file_only(source, &packs, opts.preferred_pack, - opts.refs_snapshot, opts.flags); + opts.refs_snapshot, + opts.incremental_base, opts.flags); string_list_clear(&packs, 0); free(opts.refs_snapshot); @@ -217,10 +242,15 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, struct option *options; static struct option builtin_multi_pack_index_compact_options[] = { + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_NEGBIT(0, "write-chain-file", &opts.flags, + N_("write the multi-pack-index chain file"), + MIDX_WRITE_NO_CHAIN), OPT_END(), }; @@ -239,6 +269,15 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, if (argc != 2) usage_with_options(builtin_multi_pack_index_compact_usage, options); + + if (opts.flags & MIDX_WRITE_NO_CHAIN && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--no-write-chain-file", "--incremental"); + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + } + source = handle_object_dir_option(the_repository); FREE_AND_NULL(options); @@ -266,7 +305,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, die(_("MIDX %s must be an ancestor of %s"), argv[0], argv[1]); } - ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + ret = write_midx_file_compact(source, from_midx, to_midx, + opts.incremental_base, opts.flags); return ret; } diff --git a/builtin/repack.c b/builtin/repack.c index 4c5a82c2c8..1524a9c13a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,7 @@ static int midx_must_contain_cruft = 1; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" "[--window=] [--depth=] [--threads=] [--keep-pack=]\n" - "[--write-midx] [--name-hash-version=] [--path-walk]"), + "[--write-midx[=]] [--name-hash-version=] [--path-walk]"), NULL }; @@ -42,9 +42,14 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +#define DEFAULT_MIDX_SPLIT_FACTOR 2 +#define DEFAULT_MIDX_NEW_LAYER_THRESHOLD 8 + struct repack_config_ctx { struct pack_objects_args *po_args; struct pack_objects_args *cruft_po_args; + int midx_split_factor; + int midx_new_layer_threshold; }; static int repack_config(const char *var, const char *value, @@ -94,9 +99,39 @@ static int repack_config(const char *var, const char *value, midx_must_contain_cruft = git_config_bool(var, value); return 0; } + if (!strcmp(var, "repack.midxsplitfactor")) { + repack_ctx->midx_split_factor = git_config_int(var, value, + ctx->kvi); + return 0; + } + if (!strcmp(var, "repack.midxnewlayerthreshold")) { + repack_ctx->midx_new_layer_threshold = git_config_int(var, value, + ctx->kvi); + return 0; + } return git_default_config(var, value, ctx, cb); } +static int option_parse_write_midx(const struct option *opt, const char *arg, + int unset) +{ + enum repack_write_midx_mode *cfg = opt->value; + + if (unset) { + *cfg = REPACK_WRITE_MIDX_NONE; + return 0; + } + + if (!arg || !*arg) + *cfg = REPACK_WRITE_MIDX_DEFAULT; + else if (!strcmp(arg, "incremental")) + *cfg = REPACK_WRITE_MIDX_INCREMENTAL; + else + return error(_("unknown value for %s: %s"), opt->long_name, arg); + + return 0; +} + int cmd_repack(int argc, const char **argv, const char *prefix, @@ -119,7 +154,7 @@ int cmd_repack(int argc, struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; - int write_midx = 0; + enum repack_write_midx_mode write_midx = REPACK_WRITE_MIDX_NONE; const char *cruft_expiration = NULL; const char *expire_to = NULL; const char *filter_to = NULL; @@ -185,8 +220,14 @@ int cmd_repack(int argc, N_("do not repack this pack")), OPT_INTEGER('g', "geometric", &geometry.split_factor, N_("find a geometric progression with factor ")), - OPT_BOOL('m', "write-midx", &write_midx, - N_("write a multi-pack index of the resulting packs")), + OPT_CALLBACK_F(0, "write-midx", &write_midx, + N_("mode"), + N_("write a multi-pack index of the resulting packs"), + PARSE_OPT_OPTARG, option_parse_write_midx), + OPT_SET_INT_F('m', NULL, &write_midx, + N_("write a multi-pack index of the resulting packs"), + REPACK_WRITE_MIDX_DEFAULT, + PARSE_OPT_HIDDEN), OPT_STRING(0, "expire-to", &expire_to, N_("dir"), N_("pack prefix to store a pack containing pruned objects")), OPT_STRING(0, "filter-to", &filter_to, N_("dir"), @@ -199,6 +240,8 @@ int cmd_repack(int argc, memset(&config_ctx, 0, sizeof(config_ctx)); config_ctx.po_args = &po_args; config_ctx.cruft_po_args = &cruft_po_args; + config_ctx.midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR; + config_ctx.midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD; repo_config(repo, repack_config, &config_ctx); @@ -221,14 +264,16 @@ int cmd_repack(int argc, pack_everything |= ALL_INTO_ONE; if (write_bitmaps < 0) { - if (!write_midx && + if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } if (po_args.pack_kept_objects < 0) - po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; + po_args.pack_kept_objects = write_bitmaps > 0 && + write_midx == REPACK_WRITE_MIDX_NONE; - if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) + if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && + write_midx == REPACK_WRITE_MIDX_NONE) die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && @@ -244,7 +289,14 @@ int cmd_repack(int argc, write_bitmaps = 0; } - if (write_midx && write_bitmaps) { + if (config_ctx.midx_split_factor < 2) + die(_("invalid value for %s: %d"), "--midx-split-factor", + config_ctx.midx_split_factor); + if (config_ctx.midx_new_layer_threshold < 1) + die(_("invalid value for %s: %d"), "--midx-new-layer-threshold", + config_ctx.midx_new_layer_threshold); + + if (write_midx != REPACK_WRITE_MIDX_NONE && write_bitmaps) { struct strbuf path = STRBUF_INIT; strbuf_addf(&path, "%s/%s_XXXXXX", @@ -267,6 +319,10 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL) { + geometry.midx_layer_threshold = config_ctx.midx_new_layer_threshold; + geometry.midx_layer_threshold_set = true; + } pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -297,7 +353,7 @@ int cmd_repack(int argc, } if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); - if (!write_midx) { + if (write_midx == REPACK_WRITE_MIDX_NONE) { if (write_bitmaps > 0) strvec_push(&cmd.args, "--write-bitmap-index"); else if (write_bitmaps < 0) @@ -417,7 +473,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(repo->objects->sources)) + if (!get_multi_pack_index(existing.source)) midx_must_contain_cruft = 1; } @@ -516,10 +572,13 @@ int cmd_repack(int argc, packtmp); /* End of pack replacement. */ - if (delete_redundant && pack_everything & ALL_INTO_ONE) + if (delete_redundant && pack_everything & ALL_INTO_ONE) { + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL) + existing_packs_retain_midx_packs(&existing); existing_packs_mark_for_deletion(&existing, &names); + } - if (write_midx) { + if (write_midx != REPACK_WRITE_MIDX_NONE) { struct repack_write_midx_opts opts = { .existing = &existing, .geometry = &geometry, @@ -528,11 +587,13 @@ int cmd_repack(int argc, .packdir = packdir, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, - .midx_must_contain_cruft = midx_must_contain_cruft + .midx_must_contain_cruft = midx_must_contain_cruft, + .midx_split_factor = config_ctx.midx_split_factor, + .midx_new_layer_threshold = config_ctx.midx_new_layer_threshold, + .mode = write_midx, }; - ret = write_midx_included_packs(&opts); - + ret = repack_write_midx(&opts); if (ret) goto cleanup; } @@ -541,11 +602,15 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing, packdir); + bool wrote_incremental_midx = write_midx == REPACK_WRITE_MIDX_INCREMENTAL; + + existing_packs_remove_redundant(&existing, packdir, + wrote_incremental_midx); if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing, packdir); + &existing, packdir, + wrote_incremental_midx); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -564,8 +629,7 @@ int cmd_repack(int argc, unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(repo->objects->sources, - NULL, NULL, flags); + write_midx_file(existing.source, NULL, NULL, flags); } cleanup: diff --git a/midx-write.c b/midx-write.c index a25cab75ab..561e9eedc0 100644 --- a/midx-write.c +++ b/midx-write.c @@ -29,8 +29,7 @@ extern void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); extern void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - const char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); extern int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -1109,8 +1108,7 @@ done: } static void clear_midx_files(struct odb_source *source, - const char **hashes, uint32_t hashes_nr, - unsigned incremental) + const struct strvec *hashes, unsigned incremental) { /* * if incremental: @@ -1124,13 +1122,15 @@ static void clear_midx_files(struct odb_source *source, */ struct strbuf buf = STRBUF_INIT; const char *exts[] = { MIDX_EXT_BITMAP, MIDX_EXT_REV, MIDX_EXT_MIDX }; - uint32_t i, j; + uint32_t i; for (i = 0; i < ARRAY_SIZE(exts); i++) { - clear_incremental_midx_files_ext(source, exts[i], - hashes, hashes_nr); - for (j = 0; j < hashes_nr; j++) - clear_midx_files_ext(source, exts[i], hashes[j]); + clear_incremental_midx_files_ext(source, exts[i], hashes); + if (hashes) { + for (size_t j = 0; j < hashes->nr; j++) + clear_midx_files_ext(source, exts[i], + hashes->v[j]); + } } if (incremental) @@ -1152,7 +1152,7 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c /* * Ensure that we have a valid checksum before consulting the - * exisiting MIDX in order to determine if we can avoid an + * existing MIDX in order to determine if we can avoid an * update. * * This is necessary because the given MIDX is loaded directly @@ -1208,14 +1208,16 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c BUG("same pack added twice?"); } - for (uint32_t i = 0; i < ctx->nr; i++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, midx->pack_names[i]); - strbuf_strip_suffix(&buf, ".idx"); + for (struct multi_pack_index *m = midx; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, m->pack_names[i]); + strbuf_strip_suffix(&buf, ".idx"); - if (!strset_contains(&packs, buf.buf)) - goto out; - strset_remove(&packs, buf.buf); + if (!strset_contains(&packs, buf.buf)) + goto out; + strset_remove(&packs, buf.buf); + } } needed = false; @@ -1245,6 +1247,7 @@ struct write_midx_opts { const char *preferred_pack_name; const char *refs_snapshot; + const char *incremental_base; unsigned flags; }; @@ -1255,7 +1258,7 @@ static int write_midx_internal(struct write_midx_opts *opts) unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t start_pack; struct hashfile *f = NULL; - struct lock_file lk; + struct lock_file lk = LOCK_INIT; struct tempfile *incr; struct write_midx_context ctx = { .preferred_pack_idx = NO_PREFERRED_PACK, @@ -1265,8 +1268,7 @@ static int write_midx_internal(struct write_midx_opts *opts) int pack_name_concat_len = 0; int dropped_packs = 0; int result = -1; - const char **keep_hashes = NULL; - size_t keep_hashes_nr = 0; + struct strvec keep_hashes = STRVEC_INIT; struct chunkfile *cf; trace2_region_enter("midx", "write_midx_internal", r); @@ -1329,11 +1331,32 @@ static int write_midx_internal(struct write_midx_opts *opts) /* * If compacting MIDX layer(s) in the range [from, to], then the - * compacted MIDX will share the same base MIDX as 'from'. + * compacted MIDX will share the same base MIDX as 'from', + * unless a custom --base is specified (see below). */ if (ctx.compact) ctx.base_midx = ctx.compact_from->base_midx; + if (opts->incremental_base) { + if (!strcmp(opts->incremental_base, "none")) { + ctx.base_midx = NULL; + } else { + while (ctx.base_midx) { + const char *cmp = midx_get_checksum_hex(ctx.base_midx); + if (!strcmp(opts->incremental_base, cmp)) + break; + + ctx.base_midx = ctx.base_midx->base_midx; + } + + if (!ctx.base_midx) { + error(_("could not find base MIDX '%s'"), + opts->incremental_base); + goto cleanup; + } + } + } + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1600,11 +1623,14 @@ static int write_midx_internal(struct write_midx_opts *opts) } if (ctx.incremental) { - struct strbuf lock_name = STRBUF_INIT; + if (!(opts->flags & MIDX_WRITE_NO_CHAIN)) { + struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(opts->source, &lock_name); - hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); - strbuf_release(&lock_name); + get_midx_chain_filename(opts->source, &lock_name); + hold_lock_file_for_update(&lk, lock_name.buf, + LOCK_DIE_ON_ERROR); + strbuf_release(&lock_name); + } incr = mks_tempfile_m(midx_name.buf, 0444); if (!incr) { @@ -1706,33 +1732,23 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); - if (ctx.compact) { - struct multi_pack_index *m; - - /* - * Keep all MIDX layers excluding those in the range [from, to]. - */ - for (m = ctx.base_midx; m; m = m->base_midx) - keep_hashes_nr++; - for (m = ctx.m; - m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); - m = m->base_midx) - keep_hashes_nr++; - - keep_hashes_nr++; /* include the compacted layer */ - } else { - keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; - } - CALLOC_ARRAY(keep_hashes, keep_hashes_nr); + if (!is_lock_file_locked(&lk)) + printf("%s\n", hash_to_hex_algop(midx_hash, r->hash_algo)); + else if (opts->flags & MIDX_WRITE_NO_CHAIN) + BUG("lockfile held with MIDX_WRITE_NO_CHAIN set?"); if (ctx.incremental) { - FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; + struct multi_pack_index **layers = NULL; + size_t layers_nr = 0, layers_alloc = 0; - if (!chainf) { - error_errno(_("unable to open multi-pack-index chain file")); - goto cleanup; + if (is_lock_file_locked(&lk)){ + FILE *chainf = fdopen_lock_file(&lk, "w"); + if (!chainf) { + error_errno(_("unable to open multi-pack-index chain file")); + goto cleanup; + } } if (link_midx_to_chain(ctx.base_midx) < 0) @@ -1749,60 +1765,64 @@ static int write_midx_internal(struct write_midx_opts *opts) strbuf_release(&final_midx_name); if (ctx.compact) { - struct multi_pack_index *m; - uint32_t num_layers_before_from = 0; - uint32_t i; + struct multi_pack_index *mp; - for (m = ctx.base_midx; m; m = m->base_midx) - num_layers_before_from++; - - m = ctx.base_midx; - for (i = 0; i < num_layers_before_from; i++) { - uint32_t j = num_layers_before_from - i - 1; - - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); - m = m->base_midx; + for (mp = ctx.base_midx; mp; mp = mp->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = mp; } + while (layers_nr) + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); - keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, + r->hash_algo)); - i = 0; - for (m = ctx.m; - m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); - m = m->base_midx) { - keep_hashes[keep_hashes_nr - i - 1] = - xstrdup(midx_get_checksum_hex(m)); - i++; + for (mp = ctx.m; + mp && midx_hashcmp(mp, ctx.compact_to, + r->hash_algo); + mp = mp->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = mp; } + while (layers_nr) + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); - - for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { - uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; - - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); - m = m->base_midx; + for (; m; m = m->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = m; } + while (layers_nr) + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); + + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, + r->hash_algo)); } - for (uint32_t i = 0; i < keep_hashes_nr; i++) - fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); + free(layers); + + if (is_lock_file_locked(&lk)) + for (size_t i = 0; i < keep_hashes.nr; i++) + fprintf(get_lock_file_fp(&lk), "%s\n", + keep_hashes.v[i]); } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, r->hash_algo)); } if (ctx.m || ctx.base_midx) odb_close(ctx.repo->objects); - if (commit_lock_file(&lk) < 0) - die_errno(_("could not write multi-pack-index")); + if (is_lock_file_locked(&lk)) { + if (commit_lock_file(&lk) < 0) + die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, - ctx.incremental); + clear_midx_files(opts->source, &keep_hashes, ctx.incremental); + } result = 0; cleanup: @@ -1818,11 +1838,7 @@ cleanup: free(ctx.entries); free(ctx.pack_perm); free(ctx.pack_order); - if (keep_hashes) { - for (uint32_t i = 0; i < keep_hashes_nr; i++) - free((char *)keep_hashes[i]); - free(keep_hashes); - } + strvec_clear(&keep_hashes); strbuf_release(&midx_name); close_midx(midx_to_free); @@ -1833,7 +1849,8 @@ cleanup: int write_midx_file(struct odb_source *source, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + unsigned flags) { struct write_midx_opts opts = { .source = source, @@ -1848,13 +1865,16 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + const char *incremental_base, + unsigned flags) { struct write_midx_opts opts = { .source = source, .packs_to_include = packs_to_include, .preferred_pack_name = preferred_pack_name, .refs_snapshot = refs_snapshot, + .incremental_base = incremental_base, .flags = flags, }; @@ -1864,12 +1884,14 @@ int write_midx_file_only(struct odb_source *source, int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags) { struct write_midx_opts opts = { .source = source, .compact_from = from, .compact_to = to, + .incremental_base = incremental_base, .flags = flags | MIDX_WRITE_COMPACT, }; diff --git a/midx.c b/midx.c index 81d6ab11e6..efbfbb13f4 100644 --- a/midx.c +++ b/midx.c @@ -12,6 +12,7 @@ #include "chunk-format.h" #include "pack-bitmap.h" #include "pack-revindex.h" +#include "strvec.h" #define MIDX_PACK_ERROR ((void *)(intptr_t)-1) @@ -19,8 +20,7 @@ int midx_checksum_valid(struct multi_pack_index *m); void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -667,8 +667,8 @@ static int midx_pack_names_cmp(const void *a, const void *b, void *m_) m->pack_names[*(const size_t *)b]); } -static int midx_contains_pack_1(struct multi_pack_index *m, - const char *idx_or_pack_name) +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; @@ -709,7 +709,7 @@ static int midx_contains_pack_1(struct multi_pack_index *m, int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) { for (; m; m = m->base_midx) - if (midx_contains_pack_1(m, idx_or_pack_name)) + if (midx_layer_contains_pack(m, idx_or_pack_name)) return 1; return 0; } @@ -758,8 +758,7 @@ int midx_checksum_valid(struct multi_pack_index *m) } struct clear_midx_data { - char **keep; - uint32_t keep_nr; + struct strset keep; const char *ext; }; @@ -767,15 +766,12 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS const char *file_name, void *_data) { struct clear_midx_data *data = _data; - uint32_t i; if (!(starts_with(file_name, "multi-pack-index-") && ends_with(file_name, data->ext))) return; - for (i = 0; i < data->keep_nr; i++) { - if (!strcmp(data->keep[i], file_name)) - return; - } + if (strset_contains(&data->keep, file_name)) + return; if (unlink(full_path)) die_errno(_("failed to remove %s"), full_path); } @@ -783,48 +779,49 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash) { - struct clear_midx_data data; - memset(&data, 0, sizeof(struct clear_midx_data)); + struct clear_midx_data data = { + .keep = STRSET_INIT, + .ext = ext, + }; if (keep_hash) { - ALLOC_ARRAY(data.keep, 1); + struct strbuf buf = STRBUF_INIT; + strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hash, ext); - data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext); - data.keep_nr = 1; + strset_add(&data.keep, buf.buf); + + strbuf_release(&buf); } - data.ext = ext; - for_each_file_in_pack_dir(source->path, - clear_midx_file_ext, - &data); + for_each_file_in_pack_dir(source->path, clear_midx_file_ext, &data); - if (keep_hash) - free(data.keep[0]); - free(data.keep); + strset_clear(&data.keep); } void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr) + const struct strvec *keep_hashes) { - struct clear_midx_data data; - uint32_t i; + struct clear_midx_data data = { + .keep = STRSET_INIT, + .ext = ext, + }; + struct strbuf buf = STRBUF_INIT; - memset(&data, 0, sizeof(struct clear_midx_data)); + if (keep_hashes) { + for (size_t i = 0; i < keep_hashes->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "multi-pack-index-%s.%s", + keep_hashes->v[i], ext); - ALLOC_ARRAY(data.keep, hashes_nr); - for (i = 0; i < hashes_nr; i++) - data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i], - ext); - data.keep_nr = hashes_nr; - data.ext = ext; + strset_add(&data.keep, buf.buf); + } + } for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", clear_midx_file_ext, &data); - for (i = 0; i < hashes_nr; i++) - free(data.keep[i]); - free(data.keep); + strbuf_release(&buf); + strset_clear(&data.keep); } void clear_midx_file(struct repository *r) @@ -853,6 +850,35 @@ void clear_midx_file(struct repository *r) strbuf_release(&midx); } +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes) +{ + struct odb_source *source = r->objects->sources; + struct strbuf chain = STRBUF_INIT; + + get_midx_chain_filename(source, &chain); + + for (; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (files->packed->midx) + close_midx(files->packed->midx); + files->packed->midx = NULL; + } + + if (!keep_hashes && remove_path(chain.buf)) + die(_("failed to clear multi-pack-index chain at %s"), + chain.buf); + + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_REV, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_MIDX, + keep_hashes); + + strbuf_release(&chain); +} + static int verify_midx_error; __attribute__((format (printf, 1, 2))) diff --git a/midx.h b/midx.h index 08f3728e52..63853a03a4 100644 --- a/midx.h +++ b/midx.h @@ -9,6 +9,7 @@ struct repository; struct bitmapped_pack; struct git_hash_algo; struct odb_source; +struct strvec; #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION_V1 1 @@ -83,6 +84,7 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) #define MIDX_WRITE_COMPACT (1 << 6) +#define MIDX_WRITE_NO_CHAIN (1 << 7) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" @@ -118,6 +120,8 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, int fill_midx_entry(struct multi_pack_index *m, const struct object_id *oid, struct pack_entry *e); int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name); int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); int prepare_multi_pack_index_one(struct odb_source *source); @@ -131,12 +135,17 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags); + const char *refs_snapshot, + const char *incremental_base, + unsigned flags); int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags); void clear_midx_file(struct repository *r); +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags); diff --git a/packfile.c b/packfile.c index 4208f53046..89366abfe3 100644 --- a/packfile.c +++ b/packfile.c @@ -420,6 +420,8 @@ void close_pack_index(struct packed_git *p) static void close_pack_revindex(struct packed_git *p) { + FREE_AND_NULL(p->revindex); + if (!p->revindex_map) return; diff --git a/repack-geometry.c b/repack-geometry.c index 7cebd0cb45..2064683dcf 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -4,6 +4,7 @@ #include "repack.h" #include "repository.h" #include "hex.h" +#include "midx.h" #include "packfile.h" static uint32_t pack_geometry_weight(struct packed_git *p) @@ -31,8 +32,28 @@ void pack_geometry_init(struct pack_geometry *geometry, { struct packed_git *p; struct strbuf buf = STRBUF_INIT; + struct multi_pack_index *m = get_multi_pack_index(existing->source); repo_for_each_pack(existing->repo, p) { + if (geometry->midx_layer_threshold_set && m && + p->multi_pack_index) { + /* + * When writing MIDX layers incrementally, + * ignore packs unless they are in the most + * recent MIDX layer *and* there are at least + * 'midx_layer_threshold' packs in that layer. + * + * Otherwise 'p' is either in an older layer, or + * the youngest layer does not have enough packs + * to consider its packs as candidates for + * repacking. In either of those cases we want + * to ignore the pack. + */ + if (m->num_packs < geometry->midx_layer_threshold || + !midx_layer_contains_pack(m, pack_basename(p))) + continue; + } + if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip @@ -173,6 +194,20 @@ void pack_geometry_split(struct pack_geometry *geometry) geometry->promisor_split = compute_pack_geometry_split(geometry->promisor_pack, geometry->promisor_pack_nr, geometry->split_factor); + for (uint32_t i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + /* + * During incremental MIDX/bitmap repacking, any packs + * included in the rollup are either (a) not MIDX'd, or + * (b) contained in the tip layer iff it has at least + * the threshold number of packs. + * + * In the latter case, we can safely conclude that the + * tip of the MIDX chain will be rewritten. + */ + if (p->multi_pack_index) + geometry->midx_tip_rewritten = true; + } } struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) @@ -214,7 +249,8 @@ static void remove_redundant_packs(struct packed_git **pack, uint32_t pack_nr, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -234,7 +270,8 @@ static void remove_redundant_packs(struct packed_git **pack, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf, + wrote_incremental_midx); } strbuf_release(&buf); @@ -243,12 +280,13 @@ static void remove_redundant_packs(struct packed_git **pack, void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs(geometry->pack, geometry->split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); remove_redundant_packs(geometry->promisor_pack, geometry->promisor_split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); } void pack_geometry_release(struct pack_geometry *geometry) diff --git a/repack-midx.c b/repack-midx.c index 0682b80c42..b6b1de7180 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -2,12 +2,16 @@ #include "repack.h" #include "hash.h" #include "hex.h" +#include "lockfile.h" +#include "midx.h" #include "odb.h" #include "oidset.h" #include "pack-bitmap.h" +#include "path.h" #include "refs.h" #include "run-command.h" #include "tempfile.h" +#include "trace2.h" struct midx_snapshot_ref_data { struct repository *repo; @@ -275,32 +279,63 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } -int write_midx_included_packs(struct repack_write_midx_opts *opts) +static void repack_prepare_midx_command(struct child_process *cmd, + struct repack_write_midx_opts *opts, + const char *subcommand) +{ + cmd->git_cmd = 1; + + strvec_pushl(&cmd->args, "multi-pack-index", subcommand, NULL); + + if (opts->show_progress) + strvec_push(&cmd->args, "--progress"); + else + strvec_push(&cmd->args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd->args, "--bitmap"); +} + +static int repack_fill_midx_stdin_packs(struct child_process *cmd, + struct string_list *include, + struct string_list *out) +{ + struct strbuf in_buf = STRBUF_INIT; + struct strbuf out_buf = STRBUF_INIT; + struct string_list_item *item; + int ret; + + strvec_push(&cmd->args, "--stdin-packs"); + + for_each_string_list_item(item, include) + strbuf_addf(&in_buf, "%s\n", item->string); + + ret = pipe_command(cmd, in_buf.buf, in_buf.len, + out ? &out_buf : NULL, 0, NULL, 0); + + if (out) + string_list_split_f(out, out_buf.buf, "\n", -1, + STRING_LIST_SPLIT_NONEMPTY); + + strbuf_release(&in_buf); + strbuf_release(&out_buf); + + return ret; +} + +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; int ret = 0; midx_included_packs(&include, opts); if (!include.nr) goto done; - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); + repack_prepare_midx_command(&cmd, opts, "write"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", @@ -342,16 +377,7 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) strvec_pushf(&cmd.args, "--refs-snapshot=%s", opts->refs_snapshot); - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); + ret = repack_fill_midx_stdin_packs(&cmd, &include, NULL); done: if (!ret && opts->write_bitmaps) remove_redundant_bitmaps(&include, opts->packdir); @@ -360,3 +386,637 @@ done: return ret; } + +struct midx_compaction_step { + union { + struct multi_pack_index *copy; + struct string_list write; + struct { + struct multi_pack_index *from; + struct multi_pack_index *to; + } compact; + } u; + + uint32_t objects_nr; + char *csum; + + enum { + MIDX_COMPACTION_STEP_UNKNOWN, + MIDX_COMPACTION_STEP_COPY, + MIDX_COMPACTION_STEP_WRITE, + MIDX_COMPACTION_STEP_COMPACT, + } type; +}; + +static const char *midx_compaction_step_base(const struct midx_compaction_step *step) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot use UNKNOWN step as a base"); + case MIDX_COMPACTION_STEP_COPY: + return midx_get_checksum_hex(step->u.copy); + case MIDX_COMPACTION_STEP_WRITE: + BUG("cannot use WRITE step as a base"); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_get_checksum_hex(step->u.compact.to); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static int midx_compaction_step_exec_copy(struct midx_compaction_step *step) +{ + step->csum = xstrdup(midx_get_checksum_hex(step->u.copy)); + return 0; +} + +static int midx_compaction_step_exec_write(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list hash = STRING_LIST_INIT_DUP; + struct string_list_item *item; + const char *preferred_pack = NULL; + int ret = 0; + + if (!step->u.write.nr) { + ret = error(_("no packs to write MIDX during compaction")); + goto out; + } + + for_each_string_list_item(item, &step->u.write) { + if (item->util) + preferred_pack = item->string; + } + + repack_prepare_midx_command(&cmd, opts, "write"); + strvec_pushl(&cmd.args, "--incremental", "--no-write-chain-file", NULL); + strvec_pushf(&cmd.args, "--base=%s", base ? base : "none"); + + if (preferred_pack) { + struct strbuf buf = STRBUF_INIT; + + strbuf_addstr(&buf, preferred_pack); + strbuf_strip_suffix(&buf, ".idx"); + strbuf_addstr(&buf, ".pack"); + + strvec_pushf(&cmd.args, "--preferred-pack=%s", buf.buf); + + strbuf_release(&buf); + } + + ret = repack_fill_midx_stdin_packs(&cmd, &step->u.write, &hash); + if (hash.nr != 1) { + ret = error(_("expected exactly one line during MIDX write, " + "got: %"PRIuMAX), + (uintmax_t)hash.nr); + goto out; + } + + step->csum = xstrdup(hash.items[0].string); + +out: + string_list_clear(&hash, 0); + + return ret; +} + +static int midx_compaction_step_exec_compact(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf buf = STRBUF_INIT; + FILE *out = NULL; + int ret; + + repack_prepare_midx_command(&cmd, opts, "compact"); + strvec_pushl(&cmd.args, "--incremental", "--no-write-chain-file", + midx_get_checksum_hex(step->u.compact.from), + midx_get_checksum_hex(step->u.compact.to), NULL); + + cmd.out = -1; + + ret = start_command(&cmd); + if (ret) + goto out; + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&buf, out) != EOF) { + if (step->csum) { + ret = error(_("unexpected MIDX output: '%s'"), buf.buf); + fclose(out); + out = NULL; + finish_command(&cmd); + goto out; + } + step->csum = strbuf_detach(&buf, NULL); + } + + ret = finish_command(&cmd); + +out: + if (out) + fclose(out); + strbuf_release(&buf); + + return ret; +} + +static int midx_compaction_step_exec(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot execute UNKNOWN midx compaction step"); + case MIDX_COMPACTION_STEP_COPY: + return midx_compaction_step_exec_copy(step); + case MIDX_COMPACTION_STEP_WRITE: + return midx_compaction_step_exec_write(step, opts, base); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_compaction_step_exec_compact(step, opts); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static void midx_compaction_step_release(struct midx_compaction_step *step) +{ + if (step->type == MIDX_COMPACTION_STEP_WRITE) + string_list_clear(&step->u.write, 0); + free(step->csum); +} + +/* + * Build an append-only MIDX plan: a single WRITE step for the freshly + * written packs, plus COPY steps for every existing layer. No + * compaction or merging is performed. + */ +static void repack_make_midx_append_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step *step; + size_t steps_nr = 0, steps_alloc = 0; + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + if (opts->names->nr) { + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step->u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", + opts->names->items[i].string); + string_list_append(&step->u.write, buf.buf); + } + + strbuf_release(&buf); + } + + for (; m; m = m->base_midx) { + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_COPY; + step->u.copy = m; + step->objects_nr = m->num_objects; + } + + *steps_p = steps; + *steps_nr_p = steps_nr; +} + +static int repack_make_midx_compaction_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step step = { 0 }; + struct strbuf buf = STRBUF_INIT; + size_t steps_nr = 0, steps_alloc = 0; + uint32_t i; + int ret = 0; + + trace2_region_enter("repack", "make_midx_compaction_plan", + opts->existing->repo); + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + for (i = 0; m && i < m->num_packs + m->num_packs_in_base; i++) { + if (prepare_midx_pack(m, i)) { + ret = error(_("could not load pack %"PRIu32" from MIDX"), + i); + goto out; + } + } + + trace2_region_enter("repack", "steps:write", opts->existing->repo); + + /* + * The first MIDX in the resulting chain is always going to be + * new. + * + * At a minimum, it will include all of the newly written packs. + * If there is an existing MIDX whose tip layer contains packs + * that were repacked, it will also include any of its packs + * which were *not* rolled up as part of the geometric repack + * (if any), and the previous tip will be replaced. + * + * It may grow to include the packs from zero or more MIDXs from + * the old chain, beginning either at the old tip (if the MIDX + * was *not* rewritten) or the old tip's base MIDX layer + * (otherwise). + */ + step.type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step.u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", opts->names->items[i].string); + string_list_append(&step.u.write, buf.buf); + + trace2_data_string("repack", opts->existing->repo, + "include:fresh", + step.u.write.items[step.u.write.nr - 1].string); + } + for (i = 0; i < opts->geometry->split; i++) { + struct packed_git *p = opts->geometry->pack[i]; + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:fresh:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * Now handle any existing packs which were *not* rewritten. + * + * The list of packs in opts->geometry only contains MIDX'd + * packs from the newest layer when that layer has more than + * 'repack.midxNewLayerThreshold' number of packs. + * + * If the MIDX tip was rewritten (that is, one or more of those + * packs appear below the split line), then add all packs above + * the split line to the new layer, as the old one is no longer + * usable. + * + * If the MIDX tip was not rewritten (that is, all MIDX'd packs + * from the youngest layer appear below the split line, or were + * not included in the geometric repack at all because there + * were too few of them), ignore them since we'll retain the + * existing layer as-is. + */ + for (i = opts->geometry->split; i < opts->geometry->pack_nr; i++) { + struct packed_git *p = opts->geometry->pack[i]; + struct string_list_item *item; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (p->multi_pack_index && + !opts->geometry->midx_tip_rewritten) { + trace2_data_string("repack", opts->existing->repo, + "exclude:unmodified", buf.buf); + continue; + } + + trace2_data_string("repack", opts->existing->repo, + "include:unmodified", buf.buf); + trace2_data_string("repack", opts->existing->repo, + "include:unmodified:midx", + p->multi_pack_index ? "true" : "false"); + + item = string_list_append(&step.u.write, buf.buf); + if (p->multi_pack_index || i == opts->geometry->pack_nr - 1) + item->util = (void *)1; /* mark as preferred */ + + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:unmodified:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * If the MIDX tip was rewritten, then we no longer consider it + * a candidate for compaction, since it will not exist in the + * MIDX chain being built. + */ + if (opts->geometry->midx_tip_rewritten) + m = m->base_midx; + + trace2_data_string("repack", opts->existing->repo, "midx:rewrote-tip", + opts->geometry->midx_tip_rewritten ? "true" : "false"); + + trace2_region_enter("repack", "compact", opts->existing->repo); + + /* + * Compact additional MIDX layers into this proposed one until + * the merging condition is violated. + */ + while (m) { + uint32_t preferred_pack_idx; + + trace2_data_string("repack", opts->existing->repo, + "candidate", midx_get_checksum_hex(m)); + + if (step.objects_nr < m->num_objects / opts->midx_split_factor) { + /* + * Stop compacting MIDX layer as soon as the + * merged size is less than half the size of the + * next layer in the chain. + */ + trace2_data_string("repack", opts->existing->repo, + "compact", "violated"); + trace2_data_intmax("repack", opts->existing->repo, + "objects_nr", + (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "next_objects_nr", + (uintmax_t)m->num_objects); + trace2_data_intmax("repack", opts->existing->repo, + "split_factor", + (uintmax_t)opts->midx_split_factor); + + break; + } + + if (midx_preferred_pack(m, &preferred_pack_idx) < 0) { + ret = error(_("could not find preferred pack for MIDX " + "%s"), midx_get_checksum_hex(m)); + goto out; + } + + for (i = 0; i < m->num_packs; i++) { + struct string_list_item *item; + uint32_t pack_int_id = i + m->num_packs_in_base; + struct packed_git *p = nth_midxed_pack(m, pack_int_id); + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + trace2_data_string("repack", opts->existing->repo, + "midx:pack", buf.buf); + + item = string_list_append(&step.u.write, buf.buf); + if (pack_int_id == preferred_pack_idx) + item->util = (void *)1; /* mark as preferred */ + } + + if (unsigned_add_overflows(step.objects_nr, m->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + step.objects_nr += m->num_objects; + + m = m->base_midx; + } + + if (step.u.write.nr > 0) { + /* + * As long as there is at least one new pack to write + * (and thus the MIDX is non-empty), add it to the plan. + */ + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + steps[steps_nr++] = step; + } + + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "step:packs_nr", (uintmax_t)step.u.write.nr); + + trace2_region_leave("repack", "compact", opts->existing->repo); + trace2_region_leave("repack", "steps:write", opts->existing->repo); + + trace2_region_enter("repack", "steps:rest", opts->existing->repo); + + /* + * Then start over, repeat, and either compact or keep as-is + * each MIDX layer until we have exhausted the chain. + * + * Finally, evaluate the remainder of the chain (if any) and + * either compact a sequence of adjacent layers, or keep + * individual layers as-is according to the same merging + * condition as above. + */ + while (m) { + struct multi_pack_index *next = m; + + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + + memset(&step, 0, sizeof(step)); + step.type = MIDX_COMPACTION_STEP_UNKNOWN; + + trace2_region_enter("repack", "step", opts->existing->repo); + + trace2_data_string("repack", opts->existing->repo, + "from", midx_get_checksum_hex(m)); + + while (next) { + uint32_t proposed_objects_nr; + if (unsigned_add_overflows(step.objects_nr, next->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + trace2_region_leave("repack", "step", opts->existing->repo); + goto out; + } + + proposed_objects_nr = step.objects_nr + next->num_objects; + + trace2_data_string("repack", opts->existing->repo, + "proposed", + midx_get_checksum_hex(next)); + trace2_data_intmax("repack", opts->existing->repo, + "proposed:objects_nr", + (uintmax_t)next->num_objects); + + if (!next->base_midx) { + /* + * If we are at the end of the MIDX + * chain, there is nothing to compact, + * so mark it and stop. + */ + step.objects_nr = proposed_objects_nr; + break; + } + + if (proposed_objects_nr < next->base_midx->num_objects / opts->midx_split_factor) { + /* + * If there is a MIDX following this + * one, but our accumulated size is less + * than half of its size, compacting + * them would violate the merging + * condition, so stop here. + */ + + trace2_data_string("repack", opts->existing->repo, + "compact:violated:at", + midx_get_checksum_hex(next->base_midx)); + trace2_data_intmax("repack", opts->existing->repo, + "compact:violated:at:objects_nr", + (uintmax_t)next->base_midx->num_objects); + break; + } + + /* + * Otherwise, it is OK to compact the next layer + * into this one. Do so, and then continue + * through the remainder of the chain. + */ + step.objects_nr = proposed_objects_nr; + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", + (uintmax_t)step.objects_nr); + next = next->base_midx; + } + + if (m == next) { + step.type = MIDX_COMPACTION_STEP_COPY; + step.u.copy = m; + + trace2_data_string("repack", opts->existing->repo, + "type", "copy"); + } else { + step.type = MIDX_COMPACTION_STEP_COMPACT; + step.u.compact.from = next; + step.u.compact.to = m; + + trace2_data_string("repack", opts->existing->repo, + "to", midx_get_checksum_hex(m)); + trace2_data_string("repack", opts->existing->repo, + "type", "compact"); + } + + m = next->base_midx; + steps[steps_nr++] = step; + trace2_region_leave("repack", "step", opts->existing->repo); + } + + trace2_region_leave("repack", "steps:rest", opts->existing->repo); + +out: + *steps_p = steps; + *steps_nr_p = steps_nr; + + strbuf_release(&buf); + + trace2_region_leave("repack", "make_midx_compaction_plan", + opts->existing->repo); + + return ret; +} + +static int write_midx_incremental(struct repack_write_midx_opts *opts) +{ + struct midx_compaction_step *steps = NULL; + struct strbuf lock_name = STRBUF_INIT; + struct lock_file lf; + struct strvec keep_hashes = STRVEC_INIT; + size_t steps_nr = 0; + size_t i; + int ret = 0; + + get_midx_chain_filename(opts->existing->source, &lock_name); + if (safe_create_leading_directories(opts->existing->repo, + lock_name.buf)) + die_errno(_("unable to create leading directories of %s"), + lock_name.buf); + hold_lock_file_for_update(&lf, lock_name.buf, LOCK_DIE_ON_ERROR); + + if (!fdopen_lock_file(&lf, "w")) { + ret = error_errno(_("unable to open multi-pack-index chain file")); + goto done; + } + + if (opts->geometry->split_factor) { + if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { + ret = error(_("unable to generate compaction plan")); + goto done; + } + } else { + repack_make_midx_append_plan(opts, &steps, &steps_nr); + } + + for (i = 0; i < steps_nr; i++) { + struct midx_compaction_step *step = &steps[i]; + char *base = NULL; + + if (i + 1 < steps_nr) + base = xstrdup(midx_compaction_step_base(&steps[i + 1])); + + if (midx_compaction_step_exec(step, opts, base) < 0) { + ret = error(_("unable to execute compaction step %"PRIuMAX), + (uintmax_t)i); + free(base); + goto done; + } + + free(base); + } + + i = steps_nr; + while (i--) { + struct midx_compaction_step *step = &steps[i]; + if (!step->csum) + BUG("missing result for compaction step %"PRIuMAX, + (uintmax_t)i); + fprintf(get_lock_file_fp(&lf), "%s\n", step->csum); + strvec_push(&keep_hashes, step->csum); + } + + commit_lock_file(&lf); + + clear_incremental_midx_files(opts->existing->repo, &keep_hashes); + +done: + strvec_clear(&keep_hashes); + strbuf_release(&lock_name); + for (i = 0; i < steps_nr; i++) + midx_compaction_step_release(&steps[i]); + free(steps); + return ret; +} + +int repack_write_midx(struct repack_write_midx_opts *opts) +{ + switch (opts->mode) { + case REPACK_WRITE_MIDX_NONE: + BUG("write_midx mode is NONE?"); + case REPACK_WRITE_MIDX_DEFAULT: + return write_midx_included_packs(opts); + case REPACK_WRITE_MIDX_INCREMENTAL: + return write_midx_incremental(opts); + default: + BUG("unhandled write_midx mode: %d", opts->mode); + } +} diff --git a/repack.c b/repack.c index 596841027a..571dabb665 100644 --- a/repack.c +++ b/repack.c @@ -55,14 +55,18 @@ void pack_objects_args_release(struct pack_objects_args *args) } void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name) + const char *base_name, + bool wrote_incremental_midx) { struct strbuf buf = STRBUF_INIT; struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) + if (m && source->local && midx_contains_pack(m, buf.buf)) { clear_midx_file(repo); + if (!wrote_incremental_midx) + clear_incremental_midx_files(repo, NULL); + } strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); @@ -154,6 +158,8 @@ void existing_packs_collect(struct existing_packs *existing, string_list_append(&existing->non_kept_packs, buf.buf); } + existing->source = existing->repo->objects->sources; + string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); @@ -248,25 +254,63 @@ void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } +/* + * Mark every pack that is referenced by the existing MIDX chain as + * retained, so that a subsequent call to + * existing_packs_mark_for_deletion() will not mark them for deletion. + * + * This is used when writing an incremental MIDX layer on top of an + * existing chain: retained layers continue to reference the same + * packs on disk, so those packs must not be unlinked even if the + * freshly-written pack supersedes them. + */ +void existing_packs_retain_midx_packs(struct existing_packs *existing) +{ + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->midx_packs) { + struct string_list_item *found; + + strbuf_reset(&buf); + strbuf_addstr(&buf, item->string); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_strip_suffix(&buf, ".idx"); + + found = string_list_lookup(&existing->non_kept_packs, buf.buf); + if (found) + existing_packs_mark_retained(found); + + found = string_list_lookup(&existing->cruft_packs, buf.buf); + if (found) + existing_packs_mark_retained(found); + } + + strbuf_release(&buf); +} + static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - repack_remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string, + wrote_incremental_midx); } } void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); + packdir, wrote_incremental_midx); remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); + packdir, wrote_incremental_midx); } void existing_packs_release(struct existing_packs *existing) diff --git a/repack.h b/repack.h index bc9f2e1a5d..f9fbc895f0 100644 --- a/repack.h +++ b/repack.h @@ -34,7 +34,8 @@ void prepare_pack_objects(struct child_process *cmd, void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name); + const char *base_name, + bool wrote_incremental_midx); struct write_pack_opts { struct pack_objects_args *po_args; @@ -56,6 +57,7 @@ struct packed_git; struct existing_packs { struct repository *repo; + struct odb_source *source; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; @@ -82,8 +84,10 @@ void existing_packs_retain_cruft(struct existing_packs *existing, struct packed_git *cruft); void existing_packs_mark_for_deletion(struct existing_packs *existing, struct string_list *names); +void existing_packs_retain_midx_packs(struct existing_packs *existing); void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void existing_packs_release(struct existing_packs *existing); struct generated_pack; @@ -107,6 +111,10 @@ struct pack_geometry { uint32_t promisor_pack_nr, promisor_pack_alloc; uint32_t promisor_split; + uint32_t midx_layer_threshold; + bool midx_layer_threshold_set; + bool midx_tip_rewritten; + int split_factor; }; @@ -124,11 +132,18 @@ struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +enum repack_write_midx_mode { + REPACK_WRITE_MIDX_NONE, + REPACK_WRITE_MIDX_DEFAULT, + REPACK_WRITE_MIDX_INCREMENTAL, +}; + struct repack_write_midx_opts { struct existing_packs *existing; struct pack_geometry *geometry; @@ -138,10 +153,13 @@ struct repack_write_midx_opts { int show_progress; int write_bitmaps; int midx_must_contain_cruft; + int midx_split_factor; + int midx_new_layer_threshold; + enum repack_write_midx_mode mode; }; void midx_snapshot_refs(struct repository *repo, struct tempfile *f); -int write_midx_included_packs(struct repack_write_midx_opts *opts); +int repack_write_midx(struct repack_write_midx_opts *opts); int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, diff --git a/t/meson.build b/t/meson.build index fd955f44ef..69bd8fcc6c 100644 --- a/t/meson.build +++ b/t/meson.build @@ -952,6 +952,7 @@ integration_tests = [ 't7702-repack-cyclic-alternate.sh', 't7703-repack-geometric.sh', 't7704-repack-cruft.sh', + 't7705-repack-incremental-midx.sh', 't7800-difftool.sh', 't7810-grep.sh', 't7811-grep-open.sh', diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index 99c7d44d8e..68a103d13d 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -96,6 +96,53 @@ test_expect_success 'show object from second pack' ' git cat-file -p 2.2 ' +test_expect_success 'write MIDX layer with --no-write-chain-file' ' + test_commit no-write-chain-file && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write non-incremental MIDX layer with --no-write-chain-file' ' + test_must_fail git multi-pack-index write --bitmap --no-write-chain-file 2>err && + test_grep "cannot use --no-write-chain-file without --incremental" err +' + +test_expect_success 'write MIDX layer with --base without --no-write-chain-file' ' + test_must_fail git multi-pack-index write --bitmap --incremental \ + --base=none 2>err && + test_grep "cannot use --base without --no-write-chain-file" err +' + +test_expect_success 'write MIDX layer with --base=none and --no-write-chain-file' ' + test_commit base-none && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file --base=none)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write MIDX layer with --base= and --no-write-chain-file' ' + test_commit base-hash && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file --base="$(nth_line 1 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + for reuse in false single multi do test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' @@ -132,4 +179,20 @@ test_expect_success 'relink existing MIDX layer' ' ' +test_expect_success 'non-incremental write with existing incremental chain' ' + git init non-incremental-write-with-existing && + test_when_finished "rm -fr non-incremental-write-with-existing" && + + ( + cd non-incremental-write-with-existing && + + git config set maintenance.auto false && + + write_midx_layer && + write_midx_layer && + + git multi-pack-index write + ) +' + test_done diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 40f3844282..ec1dafe89f 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -290,4 +290,117 @@ test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' ) ' +test_expect_success 'MIDX compaction with --no-write-chain-file' ' + git init midx-compact-with--no-write-chain-file && + ( + cd midx-compact-with--no-write-chain-file && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + cp "$midx_chain" "$midx_chain".bak && + + layer="$(git multi-pack-index compact --incremental \ + --no-write-chain-file \ + --base="$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + + # After writing the new layer, insert it into the chain + # manually. This is done in order to make $layer visible + # to the read-midx test helper below, and matches what + # the MIDX command would do without --no-write-chain-file. + { + nth_line 1 "$midx_chain.bak" && + echo $layer && + nth_line 4 "$midx_chain.bak" + } >$midx_chain && + + test-tool read-midx $objdir $layer >midx.data && + grep "^pack-B-.*\.idx" midx.data && + grep "^pack-C-.*\.idx" midx.data + + ) +' + +test_expect_success 'MIDX compaction with --base' ' + git init midx-compact-with--base && + ( + cd midx-compact-with--base && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 "$midx_chain" && + + cp "$midx_chain" "$midx_chain.bak" && + + git multi-pack-index compact --incremental \ + --base="$(nth_line 1 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 2 $midx_chain && + + nth_line 1 "$midx_chain.bak" >expect && + nth_line 1 "$midx_chain" >actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with --base=none' ' + git init midx-compact-base-none && + ( + cd midx-compact-base-none && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + + cp "$midx_chain" "$midx_chain".bak && + + # Compact the two bottommost layers (A and B) into a new + # root layer with no parent. + git multi-pack-index compact --incremental \ + --base=none \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + test_line_count = 3 $midx_chain && + + # The upper layers (C and D) should be preserved + # unchanged. + nth_line 3 "$midx_chain.bak" >expect && + nth_line 4 "$midx_chain.bak" >>expect && + nth_line 2 "$midx_chain" >actual && + nth_line 3 "$midx_chain" >>actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with bogus --base checksum' ' + git init midx-compact-bogus-base && + ( + cd midx-compact-bogus-base && + + git config maintenance.auto false && + + write_packs A B C && + + test_must_fail git multi-pack-index compact --incremental \ + --base=deadbeef \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" 2>err && + test_grep "could not find base MIDX" err + ) +' + test_done diff --git a/t/t7705-repack-incremental-midx.sh b/t/t7705-repack-incremental-midx.sh new file mode 100755 index 0000000000..25a8c40e8e --- /dev/null +++ b/t/t7705-repack-incremental-midx.sh @@ -0,0 +1,525 @@ +#!/bin/sh + +test_description='git repack --write-midx=incremental' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +# incrementally_repack N +# +# Make "N" new commits, each stored in their own pack, and then repacked +# with the --write-midx=incremental strategy. +incrementally_repack () { + for i in $(test_seq 1 "$1") + do + test_commit "$i" && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify || return 1 + done +} + +# Create packs with geometrically increasing sizes so that they +# satisfy the geometric progression and survive a --geometric=2 +# repack without being rolled up. Creates 3 packs containing 1, +# 2, and 6 commits (3, 6, and 18 objects) respectively. +create_geometric_packs () { + test_commit "small" && + git repack -d && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index +} + +# create_layer +# +# Creates a new MIDX layer with the contents of "test_commit_bulk $@". +create_layer () { + test_commit_bulk "$@" && + + git multi-pack-index write --incremental --bitmap +} + +# create_layers +# +# Reads lines of " " from stdin and creates a new MIDX +# layer for each line. See create_layer above for more. +create_layers () { + while read msg nr + do + create_layer --message="$msg" "$nr" || return 1 + done +} + +test_expect_success '--write-midx=incremental without --geometric' ' + git init incremental-without-geometric && + ( + cd incremental-without-geometric && + + git config maintenance.auto false && + + test_commit first && + git repack -d && + + test_commit second && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && + + # A second repack appends a new layer without + # disturbing the existing one. + test_commit third && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 2 $midx_chain && + head -n 1 $midx_chain.before >expect && + head -n 1 $midx_chain >actual && + test_cmp expect actual && + + git fsck + ) +' + +test_expect_success 'below layer threshold, tip packs excluded' ' + git init below-layer-threshold-tip-packs-excluded && + ( + cd below-layer-threshold-tip-packs-excluded && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 4 && + git config repack.midxsplitfactor 2 && + + # Create 3 packs forming a geometric progression by + # object count such that they are unmodified by the + # initial repack. The MIDX chain thusly contains a + # single layer with three packs. + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && + + # Repack a new commit. Since the layer threshold is + # unmet, a new MIDX layer is added on top of the + # existing one. + test_commit extra && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + test_line_count = 1 packs.new && + + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual + ) +' + +test_expect_success 'above layer threshold, tip packs repacked' ' + git init above-layer-threshold-tip-packs-repacked && + ( + cd above-layer-threshold-tip-packs-repacked && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + # Same setup, but with the layer threshold set to 2. + # Since the tip MIDX layer meets that threshold, its + # packs are considered repack candidates. + create_geometric_packs && + cp $midx_chain $midx_chain.before && + + # Perturb the existing progression such that it is + # rolled up into a single new pack, invalidating the + # existing MIDX layer and replacing it with a new one. + test_commit extra && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip layer preserved' ' + git init above-layer-threshold-tip-layer-preserved && + ( + cd above-layer-threshold-tip-layer-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 1 "$midx_chain" && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Create objects to form a pack satisfying the geometric + # progression (thus preserving the tip layer), but not + # so large that it meets the layer merging condition. + test_commit_bulk --message="small" 1 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + test_line_count = 1 packs.new && + test_line_count = 3 packs.after && + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip packs preserved' ' + git init above-layer-threshold-tip-packs-preserved && + ( + cd above-layer-threshold-tip-packs-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Same setup as above, but this time the new objects do + # not satisfy the new layer merging condition, resulting + # in a new tip layer. + test_commit_bulk --message="huge" 18 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + test_line_count = 1 packs.new && + + git multi-pack-index verify + ) +' + +test_expect_success 'new tip absorbs multiple layers' ' + git init new-tip-absorbs-multiple-layers && + ( + cd new-tip-absorbs-multiple-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a 4-layer chain where each layer is too small to + # absorb the one below it. The sizes must satisfy L(n) < + # L(n-1)/2 for each adjacent pair: + # + # L0 (oldest): 75 obj (25 commits) + # L1: 21 obj (7 commits, 21 < 75/2) + # L2: 9 obj (3 commits, 9 < 21/2) + # L3 (tip): 3 obj (1 commit, 3 < 9/2) + create_layers <<-\EOF && + L0 25 + L1 7 + L2 3 + L3 1 + EOF + + test_line_count = 4 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Now add a new commit. The merging condition is + # satisfied between L3-L1, but violated at L0, which is + # too large relative to the accumulated size. + # + # As a result, the chain shrinks from 4 to 2 layers. + test_commit new && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'compaction of older layers' ' + git init compaction-of-older-layers && + ( + cd compaction-of-older-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a chain with two small layers at the bottom + # and a larger barrier layer on top, producing a + # chain that violates the compaction invariant, since + # the two small layers would normally have been merged. + create_layers <<-\EOF && + one 2 + two 4 + barrier 54 + EOF + + cp $midx_chain $midx_chain.before && + + # Running an incremental repack compacts the two + # small layers at the bottom of the chain as a + # separate step in the compaction plan. + test_commit another && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'geometric rollup with surviving tip packs' ' + git init geometric-rollup-with-surviving-tip-packs && + ( + cd geometric-rollup-with-surviving-tip-packs && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create a pack large enough to anchor the geometric + # progression when small packs are added alongside it. + create_layer --message="big" 5 && + + test_line_count = 1 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Repack a small number of objects such that the + # progression is unbothered. Note that the existing pack + # is considered a repack candidate as the new layer + # threshold is set to 1. + test_commit small-1 && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + cp $midx_chain $midx_chain.before + ) +' + +test_expect_success 'kept packs are excluded from repack' ' + git init kept-packs-excluded-from-repack && + ( + cd kept-packs-excluded-from-repack && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create two equal-sized packs, marking one as kept. + for i in A B + do + test_commit "$i" && git repack -d || return 1 + done && + + keep=$(ls $packdir/pack-*.idx | head -n 1) && + touch "${keep%.idx}.keep" && + + # The kept pack is excluded as a repacking candidate + # entirely, so no rollup occurs as there is only one + # non-kept pack. A new MIDX layer is written containing + # that pack. + git repack --geometric=2 -d --write-midx=incremental && + + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count = 1 actual.packs && + test_grep ! "$keep" actual.packs && + + git multi-pack-index verify && + + # All objects (from both kept and non-kept packs) + # must still be accessible. + git fsck + ) +' + +test_expect_success 'incremental MIDX with --max-pack-size' ' + git init incremental-midx-with--max-pack-size && + ( + cd incremental-midx-with--max-pack-size && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + # Now add enough data that a small --max-pack-size will + # cause pack-objects to split its output. Create objects + # large enough to fill multiple packs. + test-tool genrandom foo 1M >big1 && + test-tool genrandom bar 1M >big2 && + git add big1 big2 && + test_tick && + git commit -a -m "big blobs" && + git repack -d && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index --max-pack-size=1M && + + test_line_count = 1 "$midx_chain" && + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count -gt 1 actual.packs && + + git multi-pack-index verify + ) +' + +test_expect_success 'noop repack preserves valid MIDX chain' ' + git init noop-repack-preserves-valid-midx-chain && + ( + cd noop-repack-preserves-valid-midx-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + git multi-pack-index verify && + cp $midx_chain $midx_chain.before && + + # Running again with no new objects should not break + # the MIDX chain. It produces "Nothing new to pack." + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_cmp $midx_chain.before $midx_chain && + + git multi-pack-index verify && + git fsck + ) +' + +test_expect_success 'repack -ad removes stale incremental chain' ' + git init repack--ad-removes-stale-incremental-chain && + ( + cd repack--ad-removes-stale-incremental-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layers <<-\EOF && + one 1 + two 1 + EOF + + test_path_is_file $midx_chain && + test_line_count = 2 $midx_chain && + + git repack -ad && + + test_path_is_missing $packdir/multi-pack-index && + test_dir_is_empty $midxdir + ) +' + +test_expect_success 'repack -ad --write-midx=incremental is safe' ' + git init ad-incremental-midx && + ( + cd ad-incremental-midx && + + git config maintenance.auto false && + + # Build a MIDX chain with multiple layers referencing + # distinct packs. + test_commit first && + git repack -d && + + test_commit second && + git repack -d --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 1 $midx_chain && + + # Now do a full -ad repack. The new pack contains all + # objects, but any retained MIDX layers still reference + # the now-deleted packs. + test_commit third && + git repack -ad --write-midx=incremental && + + git multi-pack-index verify && + git fsck && + git rev-list --all --objects >/dev/null + ) +' + +test_expect_success 'repack rejects invalid midxSplitFactor' ' + test_when_finished "rm -fr bad-split-factor" && + git init bad-split-factor && + ( + cd bad-split-factor && + test_commit base && + + for v in 0 1 -1 + do + test_must_fail git -c repack.midxSplitFactor=$v \ + repack -d --geometric=2 --write-midx=incremental 2>err && + test_grep "invalid value for --midx-split-factor" err || + return 1 + done + ) +' + +test_expect_success 'repack rejects invalid midxNewLayerThreshold' ' + test_when_finished "rm -fr bad-layer-threshold" && + git init bad-layer-threshold && + ( + cd bad-layer-threshold && + test_commit base && + + for v in 0 -1 + do + test_must_fail git -c repack.midxNewLayerThreshold=$v \ + repack -d --geometric=2 --write-midx=incremental 2>err && + test_grep "invalid value for --midx-new-layer-threshold" err || + return 1 + done + ) +' + +test_done