From 22235136406ce54b752ec1aa7df76bfb00805bbc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:39 -0400 Subject: [PATCH 01/16] midx-write: handle noop writes when converting incremental chains When updating a MIDX, we optimize out writes that will result in an identical MIDX as the one we already have on disk. See b3bab9d2729 (midx-write: extract function to test whether MIDX needs updating, 2025-12-10) for more details on exactly which writes are optimized out. If `midx_needs_update()` can't rule out any of the obvious cases (e.g., the checksum is invalid, we're requesting a different version, or performing compaction which always requires an update), then we compare the packs we're writing to the packs we already know about. If there are an equal number of packs being written as there are in any existing MIDX layer(s), then we compare the packs by their name. This comparison fails when we have an incremental MIDX chain with at least two layers, since we do not recursively peel through earlier layers, instead treating the `->pack_names` array of the tip MIDX layer as containing all `m->num_packs + m->num_packs_in_base` packs. Adjust this to instead look through the MIDX layers one by one when comparing pack names. While we're at it, fix a typo above in the same function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 18 ++++++++++-------- t/t5334-incremental-multi-pack-index.sh | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/midx-write.c b/midx-write.c index a25cab75ab..9328f65a20 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1152,7 +1152,7 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c /* * Ensure that we have a valid checksum before consulting the - * exisiting MIDX in order to determine if we can avoid an + * existing MIDX in order to determine if we can avoid an * update. * * This is necessary because the given MIDX is loaded directly @@ -1208,14 +1208,16 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c BUG("same pack added twice?"); } - for (uint32_t i = 0; i < ctx->nr; i++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, midx->pack_names[i]); - strbuf_strip_suffix(&buf, ".idx"); + for (struct multi_pack_index *m = midx; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, m->pack_names[i]); + strbuf_strip_suffix(&buf, ".idx"); - if (!strset_contains(&packs, buf.buf)) - goto out; - strset_remove(&packs, buf.buf); + if (!strset_contains(&packs, buf.buf)) + goto out; + strset_remove(&packs, buf.buf); + } } needed = false; diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index 99c7d44d8e..c9f5b4e87a 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -132,4 +132,20 @@ test_expect_success 'relink existing MIDX layer' ' ' +test_expect_success 'non-incremental write with existing incremental chain' ' + git init non-incremental-write-with-existing && + test_when_finished "rm -fr non-incremental-write-with-existing" && + + ( + cd non-incremental-write-with-existing && + + git config set maintenance.auto false && + + write_midx_layer && + write_midx_layer && + + git multi-pack-index write + ) +' + test_done From ddaa7a6fb79038a30b59341ed3f0f2097014ccbf Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:42 -0400 Subject: [PATCH 02/16] midx: use `strset` for retained MIDX files Both `clear_midx_files_ext()` and `clear_incremental_midx_files_ext()` build a list of filenames to keep while pruning stale MIDX files. Today they hand-roll an array instead of using a `strset`, thus requiring us to pass an additional length parameter, and makes lookups linear. Replace the bare array with a `strset` which can be passed around as a single parameter. Though it improves lookup performance, the difference is likely immeasurable given how small the keep_hashes array typically is. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/midx.c b/midx.c index 81d6ab11e6..f75e3c9fa6 100644 --- a/midx.c +++ b/midx.c @@ -758,8 +758,7 @@ int midx_checksum_valid(struct multi_pack_index *m) } struct clear_midx_data { - char **keep; - uint32_t keep_nr; + struct strset keep; const char *ext; }; @@ -767,15 +766,12 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS const char *file_name, void *_data) { struct clear_midx_data *data = _data; - uint32_t i; if (!(starts_with(file_name, "multi-pack-index-") && ends_with(file_name, data->ext))) return; - for (i = 0; i < data->keep_nr; i++) { - if (!strcmp(data->keep[i], file_name)) - return; - } + if (strset_contains(&data->keep, file_name)) + return; if (unlink(full_path)) die_errno(_("failed to remove %s"), full_path); } @@ -783,48 +779,49 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash) { - struct clear_midx_data data; - memset(&data, 0, sizeof(struct clear_midx_data)); + struct clear_midx_data data = { + .keep = STRSET_INIT, + .ext = ext, + }; if (keep_hash) { - ALLOC_ARRAY(data.keep, 1); + struct strbuf buf = STRBUF_INIT; + strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hash, ext); - data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext); - data.keep_nr = 1; + strset_add(&data.keep, buf.buf); + + strbuf_release(&buf); } - data.ext = ext; - for_each_file_in_pack_dir(source->path, - clear_midx_file_ext, - &data); + for_each_file_in_pack_dir(source->path, clear_midx_file_ext, &data); - if (keep_hash) - free(data.keep[0]); - free(data.keep); + strset_clear(&data.keep); } void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, char **keep_hashes, uint32_t hashes_nr) { - struct clear_midx_data data; + struct clear_midx_data data = { + .keep = STRSET_INIT, + .ext = ext, + }; + struct strbuf buf = STRBUF_INIT; uint32_t i; - memset(&data, 0, sizeof(struct clear_midx_data)); + for (i = 0; i < hashes_nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hashes[i], + ext); - ALLOC_ARRAY(data.keep, hashes_nr); - for (i = 0; i < hashes_nr; i++) - data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i], - ext); - data.keep_nr = hashes_nr; - data.ext = ext; + strset_add(&data.keep, buf.buf); + } for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", clear_midx_file_ext, &data); - for (i = 0; i < hashes_nr; i++) - free(data.keep[i]); - free(data.keep); + strbuf_release(&buf); + strset_clear(&data.keep); } void clear_midx_file(struct repository *r) From 3a5ebfac2f8910f335dc1f269e8a8cbdcacb7157 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:45 -0400 Subject: [PATCH 03/16] midx: build `keep_hashes` array in order Instead of filling the keep_hashes array using reverse indexing (e.g., `keep_hashes[count - i - 1]`) while traversing linked lists forward, collect linked list nodes into a temporary `layers` array and then iterate it backwards to fill `keep_hashes` sequentially. This makes the filling logic easier to follow, since each segment of the array is filled with a simple forward-marching index. Moreover, this change prepares us for a subsequent commit that will switch to using a `strvec`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 68 ++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/midx-write.c b/midx-write.c index 9328f65a20..55c778a97c 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1731,6 +1731,9 @@ static int write_midx_internal(struct write_midx_opts *opts) FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; + struct multi_pack_index **layers = NULL; + size_t layers_nr = 0, layers_alloc = 0; + size_t j = 0; if (!chainf) { error_errno(_("unable to open multi-pack-index chain file")); @@ -1751,46 +1754,49 @@ static int write_midx_internal(struct write_midx_opts *opts) strbuf_release(&final_midx_name); if (ctx.compact) { - struct multi_pack_index *m; - uint32_t num_layers_before_from = 0; - uint32_t i; + struct multi_pack_index *mp; - for (m = ctx.base_midx; m; m = m->base_midx) - num_layers_before_from++; - - m = ctx.base_midx; - for (i = 0; i < num_layers_before_from; i++) { - uint32_t j = num_layers_before_from - i - 1; - - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); - m = m->base_midx; + for (mp = ctx.base_midx; mp; mp = mp->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = mp; } + while (layers_nr) + keep_hashes[j++] = + xstrdup(midx_get_checksum_hex(layers[--layers_nr])); - keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); - - i = 0; - for (m = ctx.m; - m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); - m = m->base_midx) { - keep_hashes[keep_hashes_nr - i - 1] = - xstrdup(midx_get_checksum_hex(m)); - i++; - } - } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = + keep_hashes[j++] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); - for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { - uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; - - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); - m = m->base_midx; + for (mp = ctx.m; + mp && midx_hashcmp(mp, ctx.compact_to, + r->hash_algo); + mp = mp->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = mp; } + while (layers_nr) + keep_hashes[j++] = + xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + } else { + for (; m; m = m->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = m; + } + while (layers_nr) + keep_hashes[j++] = + xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + + keep_hashes[j++] = + xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); } - for (uint32_t i = 0; i < keep_hashes_nr; i++) + ASSERT(j == keep_hashes_nr); + + free(layers); + + for (uint32_t i = 0; i < j; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = From 046a8686a406ebff7ca01dd4a3fabf9a679e010f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:48 -0400 Subject: [PATCH 04/16] midx: use `strvec` for `keep_hashes` The `keep_hashes` array in `write_midx_internal()` accumulates the checksums of MIDX files that should be retained when pruning stale entries from the MIDX chain. For similar reasons as in a previous commit, rewrite this using a strvec, requiring us to pass one fewer parameter. Unlike the aforementioned previous commit, use a `strvec` instead of a `string_list`, which provides a more ergonomic interface to adjust the values at a particular index. The ordering is important here, as this value is used to determine the contents of the resulting `multi-pack-index-chain` file when writing with "--incremental". Since the previous commit already builds the array in forward order, the conversion is straightforward: replace indexed assignments with `strvec_push()`, drop the pre-counting and `CALLOC_ARRAY()`, and simplify cleanup via `strvec_clear()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 84 ++++++++++++++++++---------------------------------- midx.c | 20 ++++++------- 2 files changed, 38 insertions(+), 66 deletions(-) diff --git a/midx-write.c b/midx-write.c index 55c778a97c..5d9409a974 100644 --- a/midx-write.c +++ b/midx-write.c @@ -29,8 +29,7 @@ extern void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); extern void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - const char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); extern int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -1109,8 +1108,7 @@ done: } static void clear_midx_files(struct odb_source *source, - const char **hashes, uint32_t hashes_nr, - unsigned incremental) + const struct strvec *hashes, unsigned incremental) { /* * if incremental: @@ -1124,13 +1122,15 @@ static void clear_midx_files(struct odb_source *source, */ struct strbuf buf = STRBUF_INIT; const char *exts[] = { MIDX_EXT_BITMAP, MIDX_EXT_REV, MIDX_EXT_MIDX }; - uint32_t i, j; + uint32_t i; for (i = 0; i < ARRAY_SIZE(exts); i++) { - clear_incremental_midx_files_ext(source, exts[i], - hashes, hashes_nr); - for (j = 0; j < hashes_nr; j++) - clear_midx_files_ext(source, exts[i], hashes[j]); + clear_incremental_midx_files_ext(source, exts[i], hashes); + if (hashes) { + for (size_t j = 0; j < hashes->nr; j++) + clear_midx_files_ext(source, exts[i], + hashes->v[j]); + } } if (incremental) @@ -1267,8 +1267,7 @@ static int write_midx_internal(struct write_midx_opts *opts) int pack_name_concat_len = 0; int dropped_packs = 0; int result = -1; - const char **keep_hashes = NULL; - size_t keep_hashes_nr = 0; + struct strvec keep_hashes = STRVEC_INIT; struct chunkfile *cf; trace2_region_enter("midx", "write_midx_internal", r); @@ -1708,32 +1707,12 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); - if (ctx.compact) { - struct multi_pack_index *m; - - /* - * Keep all MIDX layers excluding those in the range [from, to]. - */ - for (m = ctx.base_midx; m; m = m->base_midx) - keep_hashes_nr++; - for (m = ctx.m; - m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); - m = m->base_midx) - keep_hashes_nr++; - - keep_hashes_nr++; /* include the compacted layer */ - } else { - keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; - } - CALLOC_ARRAY(keep_hashes, keep_hashes_nr); - if (ctx.incremental) { FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; struct multi_pack_index **layers = NULL; size_t layers_nr = 0, layers_alloc = 0; - size_t j = 0; if (!chainf) { error_errno(_("unable to open multi-pack-index chain file")); @@ -1761,12 +1740,12 @@ static int write_midx_internal(struct write_midx_opts *opts) layers[layers_nr++] = mp; } while (layers_nr) - keep_hashes[j++] = - xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); - keep_hashes[j++] = - xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, + r->hash_algo)); for (mp = ctx.m; mp && midx_hashcmp(mp, ctx.compact_to, @@ -1776,31 +1755,29 @@ static int write_midx_internal(struct write_midx_opts *opts) layers[layers_nr++] = mp; } while (layers_nr) - keep_hashes[j++] = - xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); } else { for (; m; m = m->base_midx) { ALLOC_GROW(layers, layers_nr + 1, layers_alloc); layers[layers_nr++] = m; } while (layers_nr) - keep_hashes[j++] = - xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); - keep_hashes[j++] = - xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, + r->hash_algo)); } - ASSERT(j == keep_hashes_nr); - free(layers); - for (uint32_t i = 0; i < j; i++) - fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); + for (size_t i = 0; i < keep_hashes.nr; i++) + fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes.v[i]); } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, r->hash_algo)); } if (ctx.m || ctx.base_midx) @@ -1809,8 +1786,7 @@ static int write_midx_internal(struct write_midx_opts *opts) if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, - ctx.incremental); + clear_midx_files(opts->source, &keep_hashes, ctx.incremental); result = 0; cleanup: @@ -1826,11 +1802,7 @@ cleanup: free(ctx.entries); free(ctx.pack_perm); free(ctx.pack_order); - if (keep_hashes) { - for (uint32_t i = 0; i < keep_hashes_nr; i++) - free((char *)keep_hashes[i]); - free(keep_hashes); - } + strvec_clear(&keep_hashes); strbuf_release(&midx_name); close_midx(midx_to_free); diff --git a/midx.c b/midx.c index f75e3c9fa6..bcb8c99901 100644 --- a/midx.c +++ b/midx.c @@ -12,6 +12,7 @@ #include "chunk-format.h" #include "pack-bitmap.h" #include "pack-revindex.h" +#include "strvec.h" #define MIDX_PACK_ERROR ((void *)(intptr_t)-1) @@ -19,8 +20,7 @@ int midx_checksum_valid(struct multi_pack_index *m); void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -799,22 +799,22 @@ void clear_midx_files_ext(struct odb_source *source, const char *ext, } void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr) + const struct strvec *keep_hashes) { struct clear_midx_data data = { .keep = STRSET_INIT, .ext = ext, }; struct strbuf buf = STRBUF_INIT; - uint32_t i; - for (i = 0; i < hashes_nr; i++) { - strbuf_reset(&buf); - strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hashes[i], - ext); + if (keep_hashes) { + for (size_t i = 0; i < keep_hashes->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "multi-pack-index-%s.%s", + keep_hashes->v[i], ext); - strset_add(&data.keep, buf.buf); + strset_add(&data.keep, buf.buf); + } } for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", From 8d342ed4b5dfbaa16f8bbc4537907d1e1224358e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:51 -0400 Subject: [PATCH 05/16] midx: introduce `--no-write-chain-file` for incremental MIDX writes When writing an incremental MIDX layer, the MIDX machinery writes the new layer into the multi-pack-index.d directory and then updates the multi-pack-index-chain file to include the freshly written layer. Future callers however may not wish to immediately update the MIDX chain itself, preferring instead to write out new layer(s) themselves before atomically updating the chain. Concretely, the new incremental MIDX-based repacking strategy will want to do exactly this (that is, assemble the new MIDX chain itself before writing a new chain file and atomically linking it into place). Introduce a `--no-write-chain-file` flag that: * writes the new MIDX layer into the multi-pack-index.d directory * prints its checksum * does not update the multi-pack-index-chain file. The MIDX chain file (and thus, the lock protecting it) remain untouched, allowing callers to assemble the chain themselves. This flag requires `--incremental`, since the notion of a separate layer only makes sense for incremental MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 17 ++++++++-- builtin/multi-pack-index.c | 28 +++++++++++++++-- midx-write.c | 42 ++++++++++++++++--------- midx.h | 1 + t/t5334-incremental-multi-pack-index.sh | 17 ++++++++++ t/t5335-compact-multi-pack-index.sh | 36 +++++++++++++++++++++ 6 files changed, 123 insertions(+), 18 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 3a5aa22778..c26196815e 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -11,9 +11,9 @@ SYNOPSIS [verse] 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] - [--refs-snapshot=] + [--refs-snapshot=] [--[no-]write-chain-file] 'git multi-pack-index' [] compact [--[no-]incremental] - [--[no-]bitmap] + [--[no-]bitmap] [--[no-]write-chain-file] 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -83,6 +83,13 @@ marker). and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when necessary. + + --[no-]write-chain-file:: + When used with `--incremental`, write a new MIDX layer + but do not update the multi-pack-index-chain file. + The checksum of the new layer is printed to standard + output, allowing the caller to assemble and write the + chain itself. Requires `--incremental`. -- compact:: @@ -97,6 +104,12 @@ compact:: --[no-]bitmap:: Control whether or not a multi-pack bitmap is written. + + --[no-]write-chain-file:: + When used with `--incremental`, write a new compacted + MIDX layer but do not update the multi-pack-index-chain + file. The checksum of the new layer is printed to + standard output. Requires `--incremental`. -- + Note that the compact command requires writing a version-2 midx that diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 0f72d96c02..f861b4b839 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -16,11 +16,11 @@ #define BUILTIN_MIDX_WRITE_USAGE \ N_("git multi-pack-index [] write [--preferred-pack=]\n" \ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ - " [--refs-snapshot=]") + " [--refs-snapshot=] [--[no-]write-chain-file]") #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " [--[no-]bitmap] ") + " [--[no-]bitmap] [--[no-]write-chain-file] ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -153,6 +153,9 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_NEGBIT(0, "write-chain-file", &opts.flags, + N_("write the multi-pack-index chain file"), + MIDX_WRITE_NO_CHAIN), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, N_("write multi-pack index containing only given indexes")), OPT_FILENAME(0, "refs-snapshot", &opts.refs_snapshot, @@ -178,6 +181,15 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_write_usage, options); + + if (opts.flags & MIDX_WRITE_NO_CHAIN && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--no-write-chain-file", "--incremental"); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -221,6 +233,9 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_NEGBIT(0, "write-chain-file", &opts.flags, + N_("write the multi-pack-index chain file"), + MIDX_WRITE_NO_CHAIN), OPT_END(), }; @@ -239,6 +254,15 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, if (argc != 2) usage_with_options(builtin_multi_pack_index_compact_usage, options); + + if (opts.flags & MIDX_WRITE_NO_CHAIN && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--no-write-chain-file", "--incremental"); + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + } + source = handle_object_dir_option(the_repository); FREE_AND_NULL(options); diff --git a/midx-write.c b/midx-write.c index 5d9409a974..38c898e5ff 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1257,7 +1257,7 @@ static int write_midx_internal(struct write_midx_opts *opts) unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t start_pack; struct hashfile *f = NULL; - struct lock_file lk; + struct lock_file lk = LOCK_INIT; struct tempfile *incr; struct write_midx_context ctx = { .preferred_pack_idx = NO_PREFERRED_PACK, @@ -1601,11 +1601,14 @@ static int write_midx_internal(struct write_midx_opts *opts) } if (ctx.incremental) { - struct strbuf lock_name = STRBUF_INIT; + if (!(opts->flags & MIDX_WRITE_NO_CHAIN)) { + struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(opts->source, &lock_name); - hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); - strbuf_release(&lock_name); + get_midx_chain_filename(opts->source, &lock_name); + hold_lock_file_for_update(&lk, lock_name.buf, + LOCK_DIE_ON_ERROR); + strbuf_release(&lock_name); + } incr = mks_tempfile_m(midx_name.buf, 0444); if (!incr) { @@ -1707,16 +1710,23 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); + if (!is_lock_file_locked(&lk)) + printf("%s\n", hash_to_hex_algop(midx_hash, r->hash_algo)); + else if (opts->flags & MIDX_WRITE_NO_CHAIN) + BUG("lockfile held with MIDX_WRITE_NO_CHAIN set?"); + if (ctx.incremental) { - FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; struct multi_pack_index **layers = NULL; size_t layers_nr = 0, layers_alloc = 0; - if (!chainf) { - error_errno(_("unable to open multi-pack-index chain file")); - goto cleanup; + if (is_lock_file_locked(&lk)){ + FILE *chainf = fdopen_lock_file(&lk, "w"); + if (!chainf) { + error_errno(_("unable to open multi-pack-index chain file")); + goto cleanup; + } } if (link_midx_to_chain(ctx.base_midx) < 0) @@ -1773,8 +1783,10 @@ static int write_midx_internal(struct write_midx_opts *opts) free(layers); - for (size_t i = 0; i < keep_hashes.nr; i++) - fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes.v[i]); + if (is_lock_file_locked(&lk)) + for (size_t i = 0; i < keep_hashes.nr; i++) + fprintf(get_lock_file_fp(&lk), "%s\n", + keep_hashes.v[i]); } else { strvec_push(&keep_hashes, hash_to_hex_algop(midx_hash, r->hash_algo)); @@ -1783,10 +1795,12 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.m || ctx.base_midx) odb_close(ctx.repo->objects); - if (commit_lock_file(&lk) < 0) - die_errno(_("could not write multi-pack-index")); + if (is_lock_file_locked(&lk)) { + if (commit_lock_file(&lk) < 0) + die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, &keep_hashes, ctx.incremental); + clear_midx_files(opts->source, &keep_hashes, ctx.incremental); + } result = 0; cleanup: diff --git a/midx.h b/midx.h index 08f3728e52..5b193882dc 100644 --- a/midx.h +++ b/midx.h @@ -83,6 +83,7 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) #define MIDX_WRITE_COMPACT (1 << 6) +#define MIDX_WRITE_NO_CHAIN (1 << 7) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index c9f5b4e87a..66d6894761 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -96,6 +96,23 @@ test_expect_success 'show object from second pack' ' git cat-file -p 2.2 ' +test_expect_success 'write MIDX layer with --no-write-chain-file' ' + test_commit no-write-chain-file && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write non-incremental MIDX layer with --no-write-chain-file' ' + test_must_fail git multi-pack-index write --bitmap --no-write-chain-file 2>err && + test_grep "cannot use --no-write-chain-file without --incremental" err +' + for reuse in false single multi do test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 40f3844282..1a65d48b62 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -290,4 +290,40 @@ test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' ) ' +test_expect_success 'MIDX compaction with --no-write-chain-file' ' + git init midx-compact-with--no-write-chain-file && + ( + cd midx-compact-with--no-write-chain-file && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + cp "$midx_chain" "$midx_chain".bak && + + layer="$(git multi-pack-index compact --incremental \ + --no-write-chain-file \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + + # After writing the new layer, insert it into the chain + # manually. This is done in order to make $layer visible + # to the read-midx test helper below, and matches what + # the MIDX command would do without --no-write-chain-file. + { + nth_line 1 "$midx_chain.bak" && + echo $layer && + nth_line 4 "$midx_chain.bak" + } >$midx_chain && + + test-tool read-midx $objdir $layer >midx.data && + grep "^pack-B-.*\.idx" midx.data && + grep "^pack-C-.*\.idx" midx.data + + ) +' + test_done From 0cd2255e64b4775520a6acbbb1868437fc26662d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:54 -0400 Subject: [PATCH 06/16] midx: support custom `--base` for incremental MIDX writes Both `compact` and `write --incremental` fix the base of the resulting MIDX layer: `compact` always places the compacted result on top of "from's" immediate parent in the chain, and `write --incremental` always appends a new layer to the existing tip. In both cases the base is not configurable. Future callers need additional flexibility. For instance, the incremental MIDX-based repacking code may wish to write a layer based on some intermediate ancestor rather than the current tip, or produce a root layer when replacing the bottommost entries in the chain. Introduce a new `--base` option for both subcommands to specify the checksum of the MIDX layer to use as the base. The given checksum must refer to a valid layer in the MIDX chain that is an ancestor of the topmost layer being written or compacted. The special value "none" is accepted to produce a root layer with no parent. This will be needed when the incremental repacking machinery determines that the bottommost layers of the chain should be replaced. If no `--base` is given, behavior is unchanged: `compact` uses "from's" immediate parent in the chain, and `write` appends to the existing tip. For the `write` subcommand, `--base` requires `--no-write-chain-file`. A plain `write --incremental` appends a new layer to the live chain tip with no mechanism to atomically replace it; overriding the base would produce a layer that does not extend the tip, breaking chain invariants. With `--no-write-chain-file` the chain is left unmodified and the caller is responsible for assembling a valid chain. For `compact`, no such restriction applies. The compaction operation atomically replaces the compacted range in the chain file, so writing the result on top of any valid ancestor preserves chain invariants. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 17 +++++- builtin/multi-pack-index.c | 24 ++++++-- midx-write.c | 34 ++++++++++- midx.h | 5 +- t/t5334-incremental-multi-pack-index.sh | 30 ++++++++++ t/t5335-compact-multi-pack-index.sh | 77 +++++++++++++++++++++++++ 6 files changed, 178 insertions(+), 9 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index c26196815e..c6d23aeeb9 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -12,8 +12,10 @@ SYNOPSIS 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] [--refs-snapshot=] [--[no-]write-chain-file] + [--base=] 'git multi-pack-index' [] compact [--[no-]incremental] - [--[no-]bitmap] [--[no-]write-chain-file] + [--[no-]bitmap] [--base=] [--[no-]write-chain-file] + 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -90,6 +92,13 @@ marker). The checksum of the new layer is printed to standard output, allowing the caller to assemble and write the chain itself. Requires `--incremental`. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base when writing a new incremental layer. + The special value `none` indicates that the new layer + should have no base (i.e., it becomes a root layer). + Requires `--no-write-chain-file`. -- compact:: @@ -110,6 +119,12 @@ compact:: MIDX layer but do not update the multi-pack-index-chain file. The checksum of the new layer is printed to standard output. Requires `--incremental`. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base for the compacted result, instead of using + the immediate parent of ``. The special value + `none` indicates that the result should have no base. -- + Note that the compact command requires writing a version-2 midx that diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index f861b4b839..00ffb36394 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -16,11 +16,13 @@ #define BUILTIN_MIDX_WRITE_USAGE \ N_("git multi-pack-index [] write [--preferred-pack=]\n" \ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ - " [--refs-snapshot=] [--[no-]write-chain-file]") + " [--refs-snapshot=] [--[no-]write-chain-file]\n" \ + " [--base=]") #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " [--[no-]bitmap] [--[no-]write-chain-file] ") + " [--[no-]bitmap] [--base=] [--[no-]write-chain-file]\n" \ + " ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -63,6 +65,7 @@ static char const * const builtin_multi_pack_index_usage[] = { static struct opts_multi_pack_index { char *object_dir; const char *preferred_pack; + const char *incremental_base; char *refs_snapshot; unsigned long batch_size; unsigned flags; @@ -151,6 +154,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_NEGBIT(0, "write-chain-file", &opts.flags, @@ -190,6 +195,13 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, options); } + if (opts.incremental_base && + !(opts.flags & MIDX_WRITE_NO_CHAIN)) { + error(_("cannot use --base without --no-write-chain-file")); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -201,7 +213,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, ret = write_midx_file_only(source, &packs, opts.preferred_pack, - opts.refs_snapshot, opts.flags); + opts.refs_snapshot, + opts.incremental_base, opts.flags); string_list_clear(&packs, 0); free(opts.refs_snapshot); @@ -229,6 +242,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, struct option *options; static struct option builtin_multi_pack_index_compact_options[] = { + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, @@ -290,7 +305,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, die(_("MIDX %s must be an ancestor of %s"), argv[0], argv[1]); } - ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + ret = write_midx_file_compact(source, from_midx, to_midx, + opts.incremental_base, opts.flags); return ret; } diff --git a/midx-write.c b/midx-write.c index 38c898e5ff..561e9eedc0 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1247,6 +1247,7 @@ struct write_midx_opts { const char *preferred_pack_name; const char *refs_snapshot; + const char *incremental_base; unsigned flags; }; @@ -1330,11 +1331,32 @@ static int write_midx_internal(struct write_midx_opts *opts) /* * If compacting MIDX layer(s) in the range [from, to], then the - * compacted MIDX will share the same base MIDX as 'from'. + * compacted MIDX will share the same base MIDX as 'from', + * unless a custom --base is specified (see below). */ if (ctx.compact) ctx.base_midx = ctx.compact_from->base_midx; + if (opts->incremental_base) { + if (!strcmp(opts->incremental_base, "none")) { + ctx.base_midx = NULL; + } else { + while (ctx.base_midx) { + const char *cmp = midx_get_checksum_hex(ctx.base_midx); + if (!strcmp(opts->incremental_base, cmp)) + break; + + ctx.base_midx = ctx.base_midx->base_midx; + } + + if (!ctx.base_midx) { + error(_("could not find base MIDX '%s'"), + opts->incremental_base); + goto cleanup; + } + } + } + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1827,7 +1849,8 @@ cleanup: int write_midx_file(struct odb_source *source, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + unsigned flags) { struct write_midx_opts opts = { .source = source, @@ -1842,13 +1865,16 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + const char *incremental_base, + unsigned flags) { struct write_midx_opts opts = { .source = source, .packs_to_include = packs_to_include, .preferred_pack_name = preferred_pack_name, .refs_snapshot = refs_snapshot, + .incremental_base = incremental_base, .flags = flags, }; @@ -1858,12 +1884,14 @@ int write_midx_file_only(struct odb_source *source, int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags) { struct write_midx_opts opts = { .source = source, .compact_from = from, .compact_to = to, + .incremental_base = incremental_base, .flags = flags | MIDX_WRITE_COMPACT, }; diff --git a/midx.h b/midx.h index 5b193882dc..77dd66de02 100644 --- a/midx.h +++ b/midx.h @@ -132,10 +132,13 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags); + const char *refs_snapshot, + const char *incremental_base, + unsigned flags); int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index 66d6894761..68a103d13d 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -113,6 +113,36 @@ test_expect_success 'write non-incremental MIDX layer with --no-write-chain-file test_grep "cannot use --no-write-chain-file without --incremental" err ' +test_expect_success 'write MIDX layer with --base without --no-write-chain-file' ' + test_must_fail git multi-pack-index write --bitmap --incremental \ + --base=none 2>err && + test_grep "cannot use --base without --no-write-chain-file" err +' + +test_expect_success 'write MIDX layer with --base=none and --no-write-chain-file' ' + test_commit base-none && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file --base=none)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write MIDX layer with --base= and --no-write-chain-file' ' + test_commit base-hash && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file --base="$(nth_line 1 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + for reuse in false single multi do test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 1a65d48b62..ec1dafe89f 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -304,6 +304,7 @@ test_expect_success 'MIDX compaction with --no-write-chain-file' ' layer="$(git multi-pack-index compact --incremental \ --no-write-chain-file \ + --base="$(nth_line 1 "$midx_chain")" \ "$(nth_line 2 "$midx_chain")" \ "$(nth_line 3 "$midx_chain")")" && @@ -326,4 +327,80 @@ test_expect_success 'MIDX compaction with --no-write-chain-file' ' ) ' +test_expect_success 'MIDX compaction with --base' ' + git init midx-compact-with--base && + ( + cd midx-compact-with--base && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 "$midx_chain" && + + cp "$midx_chain" "$midx_chain.bak" && + + git multi-pack-index compact --incremental \ + --base="$(nth_line 1 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 2 $midx_chain && + + nth_line 1 "$midx_chain.bak" >expect && + nth_line 1 "$midx_chain" >actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with --base=none' ' + git init midx-compact-base-none && + ( + cd midx-compact-base-none && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + + cp "$midx_chain" "$midx_chain".bak && + + # Compact the two bottommost layers (A and B) into a new + # root layer with no parent. + git multi-pack-index compact --incremental \ + --base=none \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + test_line_count = 3 $midx_chain && + + # The upper layers (C and D) should be preserved + # unchanged. + nth_line 3 "$midx_chain.bak" >expect && + nth_line 4 "$midx_chain.bak" >>expect && + nth_line 2 "$midx_chain" >actual && + nth_line 3 "$midx_chain" >>actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with bogus --base checksum' ' + git init midx-compact-bogus-base && + ( + cd midx-compact-bogus-base && + + git config maintenance.auto false && + + write_packs A B C && + + test_must_fail git multi-pack-index compact --incremental \ + --base=deadbeef \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" 2>err && + test_grep "could not find base MIDX" err + ) +' + test_done From f0ef2afb8be0aa37b80bc1cf1f1a9acfb208f00f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:57 -0400 Subject: [PATCH 07/16] repack: track the ODB source via existing_packs Store the ODB source in the `existing_packs` struct and use that in place of the raw `repo->objects->sources` access within `cmd_repack()`. The source used is still assigned from the first source in the list, so there are no functional changes in this commit. The changes instead serve two purposes (one immediate, one not): - The incremental MIDX-based repacking machinery will need to know what source is being used to read the existing MIDX/chain (should one exist). - In the future, if "git repack" is taught how to operate on other object sources, this field will serve as the authoritative value for that source. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 ++--- repack.c | 2 ++ repack.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4c5a82c2c8..24be147d39 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -417,7 +417,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(repo->objects->sources)) + if (!get_multi_pack_index(existing.source)) midx_must_contain_cruft = 1; } @@ -564,8 +564,7 @@ int cmd_repack(int argc, unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(repo->objects->sources, - NULL, NULL, flags); + write_midx_file(existing.source, NULL, NULL, flags); } cleanup: diff --git a/repack.c b/repack.c index 596841027a..2ee6b51420 100644 --- a/repack.c +++ b/repack.c @@ -154,6 +154,8 @@ void existing_packs_collect(struct existing_packs *existing, string_list_append(&existing->non_kept_packs, buf.buf); } + existing->source = existing->repo->objects->sources; + string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); diff --git a/repack.h b/repack.h index bc9f2e1a5d..c0e9f0ca64 100644 --- a/repack.h +++ b/repack.h @@ -56,6 +56,7 @@ struct packed_git; struct existing_packs { struct repository *repo; + struct odb_source *source; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; From ee6fb5823822bb03bd8dc5b7e7645e5b319033f0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:00 -0400 Subject: [PATCH 08/16] midx: expose `midx_layer_contains_pack()` Rename the function `midx_contains_pack_1()` to instead be called `midx_layer_contains_pack()` and make it accessible. Unlike `midx_contains_pack()` (which recurses through the entire chain), this function checks only a single MIDX layer. This will be used by a subsequent commit to determine whether a given pack belongs to the tip MIDX layer specifically, rather than to any layer in the chain. No functional changes are present in this commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 6 +++--- midx.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index bcb8c99901..dc86c8e7fe 100644 --- a/midx.c +++ b/midx.c @@ -667,8 +667,8 @@ static int midx_pack_names_cmp(const void *a, const void *b, void *m_) m->pack_names[*(const size_t *)b]); } -static int midx_contains_pack_1(struct multi_pack_index *m, - const char *idx_or_pack_name) +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; @@ -709,7 +709,7 @@ static int midx_contains_pack_1(struct multi_pack_index *m, int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) { for (; m; m = m->base_midx) - if (midx_contains_pack_1(m, idx_or_pack_name)) + if (midx_layer_contains_pack(m, idx_or_pack_name)) return 1; return 0; } diff --git a/midx.h b/midx.h index 77dd66de02..3ee12dd08e 100644 --- a/midx.h +++ b/midx.h @@ -119,6 +119,8 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, int fill_midx_entry(struct multi_pack_index *m, const struct object_id *oid, struct pack_entry *e); int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name); int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); int prepare_multi_pack_index_one(struct odb_source *source); From 1505990d72585cbdf35cd596a2167c2a8a4edda1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:03 -0400 Subject: [PATCH 09/16] repack-midx: factor out `repack_prepare_midx_command()` The `write_midx_included_packs()` function assembles and executes a `git multi-pack-index write` command, constructing the argument list inline. Future commits will introduce additional callers that need to construct similar `git multi-pack-index` commands (for both `write` and `compact` subcommands), so extract the common portions of the command setup into a reusable `repack_prepare_midx_command()` helper. The extracted helper sets `git_cmd`, pushes `multi-pack-index` and a subcommand, and handles `--progress`/`--no-progress` and `--bitmap` flags. The remaining arguments that are specific to the `write` subcommand (such as `--stdin-packs`) are left to the caller. No functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-midx.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/repack-midx.c b/repack-midx.c index 0682b80c42..5634dc186d 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -275,6 +275,23 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } +static void repack_prepare_midx_command(struct child_process *cmd, + struct repack_write_midx_opts *opts, + const char *subcommand) +{ + cmd->git_cmd = 1; + + strvec_pushl(&cmd->args, "multi-pack-index", subcommand, NULL); + + if (opts->show_progress) + strvec_push(&cmd->args, "--progress"); + else + strvec_push(&cmd->args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd->args, "--bitmap"); +} + int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -289,18 +306,9 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; cmd.in = -1; - cmd.git_cmd = 1; - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); + repack_prepare_midx_command(&cmd, opts, "write"); + strvec_push(&cmd.args, "--stdin-packs"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", From 6e38bcc51014e89a430bbd4f708170f5f7795b76 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:07 -0400 Subject: [PATCH 10/16] repack-midx: extract `repack_fill_midx_stdin_packs()` The function `write_midx_included_packs()` manages the lifecycle of writing packs to stdin when running `git multi-pack-index write` as a child process. Extract a standalone `repack_fill_midx_stdin_packs()` helper, which handles `--stdin-packs` argument setup, starting the command, writing pack names to its standard input, and finishing the command. This simplifies `write_midx_included_packs()` and prepares for a subsequent commit where the same helper is called with `cmd->out = -1` to capture the MIDX's checksum from the command's standard output, which is needed when writing MIDX layers with `--no-write-chain-file`. No functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-midx.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/repack-midx.c b/repack-midx.c index 5634dc186d..3fe83715da 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -292,23 +292,42 @@ static void repack_prepare_midx_command(struct child_process *cmd, strvec_push(&cmd->args, "--bitmap"); } +static int repack_fill_midx_stdin_packs(struct child_process *cmd, + struct string_list *include) +{ + struct string_list_item *item; + FILE *in; + int ret; + + cmd->in = -1; + + strvec_push(&cmd->args, "--stdin-packs"); + + ret = start_command(cmd); + if (ret) + return ret; + + in = xfdopen(cmd->in, "w"); + for_each_string_list_item(item, include) + fprintf(in, "%s\n", item->string); + fclose(in); + + return finish_command(cmd); +} + int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; int ret = 0; midx_included_packs(&include, opts); if (!include.nr) goto done; - cmd.in = -1; - repack_prepare_midx_command(&cmd, opts, "write"); - strvec_push(&cmd.args, "--stdin-packs"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", @@ -350,16 +369,7 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) strvec_pushf(&cmd.args, "--refs-snapshot=%s", opts->refs_snapshot); - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); + ret = repack_fill_midx_stdin_packs(&cmd, &include); done: if (!ret && opts->write_bitmaps) remove_redundant_bitmaps(&include, opts->packdir); From d0ac3969f4b8a859d23c9f45cab873cbbf8cdfb8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:10 -0400 Subject: [PATCH 11/16] repack-geometry: prepare for incremental MIDX repacking Teach `pack_geometry_init()` to optionally restrict the set of repacking candidates to only packs in the tip MIDX layer when a `midx_layer_threshold` is configured. If the tip layer has fewer packs than the threshold, those packs are excluded entirely; otherwise only packs in that layer participate in the geometric repack. Also track whether any tip-layer packs were included in the rollup (`midx_tip_rewritten`), which a subsequent commit will use to decide how to update the MIDX chain after repacking. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-geometry.c | 35 +++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 2 files changed, 39 insertions(+) diff --git a/repack-geometry.c b/repack-geometry.c index 7cebd0cb45..2408b8a3cc 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -4,6 +4,7 @@ #include "repack.h" #include "repository.h" #include "hex.h" +#include "midx.h" #include "packfile.h" static uint32_t pack_geometry_weight(struct packed_git *p) @@ -31,8 +32,28 @@ void pack_geometry_init(struct pack_geometry *geometry, { struct packed_git *p; struct strbuf buf = STRBUF_INIT; + struct multi_pack_index *m = get_multi_pack_index(existing->source); repo_for_each_pack(existing->repo, p) { + if (geometry->midx_layer_threshold_set && m && + p->multi_pack_index) { + /* + * When writing MIDX layers incrementally, + * ignore packs unless they are in the most + * recent MIDX layer *and* there are at least + * 'midx_layer_threshold' packs in that layer. + * + * Otherwise 'p' is either in an older layer, or + * the youngest layer does not have enough packs + * to consider its packs as candidates for + * repacking. In either of those cases we want + * to ignore the pack. + */ + if (m->num_packs < geometry->midx_layer_threshold || + !midx_layer_contains_pack(m, pack_basename(p))) + continue; + } + if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip @@ -173,6 +194,20 @@ void pack_geometry_split(struct pack_geometry *geometry) geometry->promisor_split = compute_pack_geometry_split(geometry->promisor_pack, geometry->promisor_pack_nr, geometry->split_factor); + for (uint32_t i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + /* + * During incremental MIDX/bitmap repacking, any packs + * included in the rollup are either (a) not MIDX'd, or + * (b) contained in the tip layer iff it has at least + * the threshold number of packs. + * + * In the latter case, we can safely conclude that the + * tip of the MIDX chain will be rewritten. + */ + if (p->multi_pack_index) + geometry->midx_tip_rewritten = true; + } } struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) diff --git a/repack.h b/repack.h index c0e9f0ca64..77d24ee45f 100644 --- a/repack.h +++ b/repack.h @@ -108,6 +108,10 @@ struct pack_geometry { uint32_t promisor_pack_nr, promisor_pack_alloc; uint32_t promisor_split; + uint32_t midx_layer_threshold; + bool midx_layer_threshold_set; + bool midx_tip_rewritten; + int split_factor; }; From d376967fbfb0b366c08be50a1c41d6b30c5e6d89 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:13 -0400 Subject: [PATCH 12/16] builtin/repack.c: convert `--write-midx` to an `OPT_CALLBACK` Change the --write-midx (-m) flag from an OPT_BOOL to an OPT_CALLBACK that accepts an optional mode argument. Introduce an enum with REPACK_WRITE_MIDX_NONE and REPACK_WRITE_MIDX_DEFAULT to distinguish between the two states, and update all existing boolean checks accordingly. For now, passing no argument (or just `-m`) selects the default mode, preserving existing behavior. A subsequent commit will add a new mode for writing incremental MIDXs. Extract repack_write_midx() as a dispatcher that selects the appropriate MIDX-writing implementation based on the mode. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 50 ++++++++++++++++++++++++++++++++++++------------ repack-midx.c | 14 +++++++++++++- repack.h | 8 +++++++- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 24be147d39..5d366340c3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -97,6 +97,24 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +static int option_parse_write_midx(const struct option *opt, const char *arg, + int unset) +{ + enum repack_write_midx_mode *cfg = opt->value; + + if (unset) { + *cfg = REPACK_WRITE_MIDX_NONE; + return 0; + } + + if (!arg || !*arg) + *cfg = REPACK_WRITE_MIDX_DEFAULT; + else + return error(_("unknown value for %s: %s"), opt->long_name, arg); + + return 0; +} + int cmd_repack(int argc, const char **argv, const char *prefix, @@ -119,7 +137,7 @@ int cmd_repack(int argc, struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; - int write_midx = 0; + enum repack_write_midx_mode write_midx = REPACK_WRITE_MIDX_NONE; const char *cruft_expiration = NULL; const char *expire_to = NULL; const char *filter_to = NULL; @@ -185,8 +203,14 @@ int cmd_repack(int argc, N_("do not repack this pack")), OPT_INTEGER('g', "geometric", &geometry.split_factor, N_("find a geometric progression with factor ")), - OPT_BOOL('m', "write-midx", &write_midx, - N_("write a multi-pack index of the resulting packs")), + OPT_CALLBACK_F(0, "write-midx", &write_midx, + N_("mode"), + N_("write a multi-pack index of the resulting packs"), + PARSE_OPT_OPTARG, option_parse_write_midx), + OPT_SET_INT_F('m', NULL, &write_midx, + N_("write a multi-pack index of the resulting packs"), + REPACK_WRITE_MIDX_DEFAULT, + PARSE_OPT_HIDDEN), OPT_STRING(0, "expire-to", &expire_to, N_("dir"), N_("pack prefix to store a pack containing pruned objects")), OPT_STRING(0, "filter-to", &filter_to, N_("dir"), @@ -221,14 +245,16 @@ int cmd_repack(int argc, pack_everything |= ALL_INTO_ONE; if (write_bitmaps < 0) { - if (!write_midx && + if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } if (po_args.pack_kept_objects < 0) - po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; + po_args.pack_kept_objects = write_bitmaps > 0 && + write_midx == REPACK_WRITE_MIDX_NONE; - if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) + if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && + write_midx == REPACK_WRITE_MIDX_NONE) die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && @@ -244,7 +270,7 @@ int cmd_repack(int argc, write_bitmaps = 0; } - if (write_midx && write_bitmaps) { + if (write_midx != REPACK_WRITE_MIDX_NONE && write_bitmaps) { struct strbuf path = STRBUF_INIT; strbuf_addf(&path, "%s/%s_XXXXXX", @@ -297,7 +323,7 @@ int cmd_repack(int argc, } if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); - if (!write_midx) { + if (write_midx == REPACK_WRITE_MIDX_NONE) { if (write_bitmaps > 0) strvec_push(&cmd.args, "--write-bitmap-index"); else if (write_bitmaps < 0) @@ -519,7 +545,7 @@ int cmd_repack(int argc, if (delete_redundant && pack_everything & ALL_INTO_ONE) existing_packs_mark_for_deletion(&existing, &names); - if (write_midx) { + if (write_midx != REPACK_WRITE_MIDX_NONE) { struct repack_write_midx_opts opts = { .existing = &existing, .geometry = &geometry, @@ -528,11 +554,11 @@ int cmd_repack(int argc, .packdir = packdir, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, - .midx_must_contain_cruft = midx_must_contain_cruft + .midx_must_contain_cruft = midx_must_contain_cruft, + .mode = write_midx, }; - ret = write_midx_included_packs(&opts); - + ret = repack_write_midx(&opts); if (ret) goto cleanup; } diff --git a/repack-midx.c b/repack-midx.c index 3fe83715da..b1ca379708 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -315,7 +315,7 @@ static int repack_fill_midx_stdin_packs(struct child_process *cmd, return finish_command(cmd); } -int write_midx_included_packs(struct repack_write_midx_opts *opts) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list include = STRING_LIST_INIT_DUP; @@ -378,3 +378,15 @@ done: return ret; } + +int repack_write_midx(struct repack_write_midx_opts *opts) +{ + switch (opts->mode) { + case REPACK_WRITE_MIDX_NONE: + BUG("write_midx mode is NONE?"); + case REPACK_WRITE_MIDX_DEFAULT: + return write_midx_included_packs(opts); + default: + BUG("unhandled write_midx mode: %d", opts->mode); + } +} diff --git a/repack.h b/repack.h index 77d24ee45f..81907fcce7 100644 --- a/repack.h +++ b/repack.h @@ -134,6 +134,11 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +enum repack_write_midx_mode { + REPACK_WRITE_MIDX_NONE, + REPACK_WRITE_MIDX_DEFAULT, +}; + struct repack_write_midx_opts { struct existing_packs *existing; struct pack_geometry *geometry; @@ -143,10 +148,11 @@ struct repack_write_midx_opts { int show_progress; int write_bitmaps; int midx_must_contain_cruft; + enum repack_write_midx_mode mode; }; void midx_snapshot_refs(struct repository *repo, struct tempfile *f); -int write_midx_included_packs(struct repack_write_midx_opts *opts); +int repack_write_midx(struct repack_write_midx_opts *opts); int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, From b0d6e7b0d0b1c20fc847f371dcc261a0c7b27be1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:16 -0400 Subject: [PATCH 13/16] packfile: ensure `close_pack_revindex()` frees in-memory revindex The following commit will introduce a case where we write a MIDX bitmap over packs that do not themselves have on-disk *.rev files. This case is supported within Git, and we will simply fall back to generating the revindex in memory. But we don't ever release that memory, causing a leak that is exposed by a test introduced in the following commit. (As far as I could find, we never free()'d memory allocated as a byproduct of creating an in-memory revindex, likely because that code predates the leak-checking niceties we have in the test suite now.) Rectify this by calling `FREE_AND_NULL()` on the `p->revindex` field when calling `close_pack_revindex()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- packfile.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packfile.c b/packfile.c index b012d648ad..a1e88fdb22 100644 --- a/packfile.c +++ b/packfile.c @@ -420,6 +420,8 @@ void close_pack_index(struct packed_git *p) static void close_pack_revindex(struct packed_git *p) { + FREE_AND_NULL(p->revindex); + if (!p->revindex_map) return; From 1da62fb5c868447640c3697e9f2ec0004e24951f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:19 -0400 Subject: [PATCH 14/16] repack: implement incremental MIDX repacking Implement the `write_midx_incremental()` function, which builds and maintains an incremental MIDX chain as part of the geometric repacking process. Unlike the default mode which writes a single flat MIDX, the incremental mode constructs a compaction plan that determines which MIDX layers to write, compact, or copy, and then executes each step using `git multi-pack-index` subcommands with the --no-write-chain-file flag. The repacking strategy works as follows: * Acquire the lock guarding the multi-pack-index-chain. * A new MIDX layer is always written containing the newly created pack(s). If the tip MIDX layer was rewritten during geometric repacking, any surviving packs from that layer are also included. * Starting from the new layer, adjacent MIDX layers are merged together as long as the accumulated object count exceeds half the object count of the next deeper layer (controlled by 'repack.midxSplitFactor'). * Remaining layers in the chain are evaluated pairwise and either compacted or copied as-is, following the same merging condition. * Write the contents of the new multi-pack-index chain, atomically move it into place, and then release the lock. * Delete any now-unused MIDX layers. After writing the new layer, the strategy is evaluated among the existing MIDX layers in order from oldest to newest. Each step that writes a new MIDX layer uses "--no-write-chain-file" to avoid updating the multi-pack-index-chain file. After all steps are complete, the new chain file is written and then atomically moved into place. At present, this functionality is exposed behind a new enum value, `REPACK_WRITE_MIDX_INCREMENTAL`, but has no external callers. A subsequent commit will expose this mode via `git repack --write-midx=incremental`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 + repack-midx.c | 593 +++++++++++++++++++++++++++++++++++++++++++++-- repack.h | 3 + 3 files changed, 588 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5d366340c3..75c5773678 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -42,6 +42,9 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +#define DEFAULT_MIDX_SPLIT_FACTOR 2 +#define DEFAULT_MIDX_NEW_LAYER_THRESHOLD 8 + struct repack_config_ctx { struct pack_objects_args *po_args; struct pack_objects_args *cruft_po_args; @@ -555,6 +558,8 @@ int cmd_repack(int argc, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft, + .midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR, + .midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD, .mode = write_midx, }; diff --git a/repack-midx.c b/repack-midx.c index b1ca379708..f97331fb1b 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -2,12 +2,16 @@ #include "repack.h" #include "hash.h" #include "hex.h" +#include "lockfile.h" +#include "midx.h" #include "odb.h" #include "oidset.h" #include "pack-bitmap.h" +#include "path.h" #include "refs.h" #include "run-command.h" #include "tempfile.h" +#include "trace2.h" struct midx_snapshot_ref_data { struct repository *repo; @@ -293,26 +297,30 @@ static void repack_prepare_midx_command(struct child_process *cmd, } static int repack_fill_midx_stdin_packs(struct child_process *cmd, - struct string_list *include) + struct string_list *include, + struct string_list *out) { + struct strbuf in_buf = STRBUF_INIT; + struct strbuf out_buf = STRBUF_INIT; struct string_list_item *item; - FILE *in; int ret; - cmd->in = -1; - strvec_push(&cmd->args, "--stdin-packs"); - ret = start_command(cmd); - if (ret) - return ret; - - in = xfdopen(cmd->in, "w"); for_each_string_list_item(item, include) - fprintf(in, "%s\n", item->string); - fclose(in); + strbuf_addf(&in_buf, "%s\n", item->string); - return finish_command(cmd); + ret = pipe_command(cmd, in_buf.buf, in_buf.len, + out ? &out_buf : NULL, 0, NULL, 0); + + if (out) + string_list_split_f(out, out_buf.buf, "\n", -1, + STRING_LIST_SPLIT_NONEMPTY); + + strbuf_release(&in_buf); + strbuf_release(&out_buf); + + return ret; } static int write_midx_included_packs(struct repack_write_midx_opts *opts) @@ -369,7 +377,7 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) strvec_pushf(&cmd.args, "--refs-snapshot=%s", opts->refs_snapshot); - ret = repack_fill_midx_stdin_packs(&cmd, &include); + ret = repack_fill_midx_stdin_packs(&cmd, &include, NULL); done: if (!ret && opts->write_bitmaps) remove_redundant_bitmaps(&include, opts->packdir); @@ -379,6 +387,563 @@ done: return ret; } +struct midx_compaction_step { + union { + struct multi_pack_index *copy; + struct string_list write; + struct { + struct multi_pack_index *from; + struct multi_pack_index *to; + } compact; + } u; + + uint32_t objects_nr; + char *csum; + + enum { + MIDX_COMPACTION_STEP_UNKNOWN, + MIDX_COMPACTION_STEP_COPY, + MIDX_COMPACTION_STEP_WRITE, + MIDX_COMPACTION_STEP_COMPACT, + } type; +}; + +static const char *midx_compaction_step_base(const struct midx_compaction_step *step) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot use UNKNOWN step as a base"); + case MIDX_COMPACTION_STEP_COPY: + return midx_get_checksum_hex(step->u.copy); + case MIDX_COMPACTION_STEP_WRITE: + BUG("cannot use WRITE step as a base"); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_get_checksum_hex(step->u.compact.to); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static int midx_compaction_step_exec_copy(struct midx_compaction_step *step) +{ + step->csum = xstrdup(midx_get_checksum_hex(step->u.copy)); + return 0; +} + +static int midx_compaction_step_exec_write(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list hash = STRING_LIST_INIT_DUP; + struct string_list_item *item; + const char *preferred_pack = NULL; + int ret = 0; + + if (!step->u.write.nr) { + ret = error(_("no packs to write MIDX during compaction")); + goto out; + } + + for_each_string_list_item(item, &step->u.write) { + if (item->util) + preferred_pack = item->string; + } + + repack_prepare_midx_command(&cmd, opts, "write"); + strvec_pushl(&cmd.args, "--incremental", "--no-write-chain-file", NULL); + strvec_pushf(&cmd.args, "--base=%s", base ? base : "none"); + + if (preferred_pack) { + struct strbuf buf = STRBUF_INIT; + + strbuf_addstr(&buf, preferred_pack); + strbuf_strip_suffix(&buf, ".idx"); + strbuf_addstr(&buf, ".pack"); + + strvec_pushf(&cmd.args, "--preferred-pack=%s", buf.buf); + + strbuf_release(&buf); + } + + ret = repack_fill_midx_stdin_packs(&cmd, &step->u.write, &hash); + if (hash.nr != 1) { + ret = error(_("expected exactly one line during MIDX write, " + "got: %"PRIuMAX), + (uintmax_t)hash.nr); + goto out; + } + + step->csum = xstrdup(hash.items[0].string); + +out: + string_list_clear(&hash, 0); + + return ret; +} + +static int midx_compaction_step_exec_compact(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf buf = STRBUF_INIT; + FILE *out = NULL; + int ret; + + repack_prepare_midx_command(&cmd, opts, "compact"); + strvec_pushl(&cmd.args, "--incremental", "--no-write-chain-file", + midx_get_checksum_hex(step->u.compact.from), + midx_get_checksum_hex(step->u.compact.to), NULL); + + cmd.out = -1; + + ret = start_command(&cmd); + if (ret) + goto out; + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&buf, out) != EOF) { + if (step->csum) { + ret = error(_("unexpected MIDX output: '%s'"), buf.buf); + fclose(out); + out = NULL; + finish_command(&cmd); + goto out; + } + step->csum = strbuf_detach(&buf, NULL); + } + + ret = finish_command(&cmd); + +out: + if (out) + fclose(out); + strbuf_release(&buf); + + return ret; +} + +static int midx_compaction_step_exec(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot execute UNKNOWN midx compaction step"); + case MIDX_COMPACTION_STEP_COPY: + return midx_compaction_step_exec_copy(step); + case MIDX_COMPACTION_STEP_WRITE: + return midx_compaction_step_exec_write(step, opts, base); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_compaction_step_exec_compact(step, opts); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static void midx_compaction_step_release(struct midx_compaction_step *step) +{ + if (step->type == MIDX_COMPACTION_STEP_WRITE) + string_list_clear(&step->u.write, 0); + free(step->csum); +} + +static int repack_make_midx_compaction_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step step = { 0 }; + struct strbuf buf = STRBUF_INIT; + size_t steps_nr = 0, steps_alloc = 0; + uint32_t i; + int ret = 0; + + trace2_region_enter("repack", "make_midx_compaction_plan", + opts->existing->repo); + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + for (i = 0; m && i < m->num_packs + m->num_packs_in_base; i++) { + if (prepare_midx_pack(m, i)) { + ret = error(_("could not load pack %"PRIu32" from MIDX"), + i); + goto out; + } + } + + trace2_region_enter("repack", "steps:write", opts->existing->repo); + + /* + * The first MIDX in the resulting chain is always going to be + * new. + * + * At a minimum, it will include all of the newly written packs. + * If there is an existing MIDX whose tip layer contains packs + * that were repacked, it will also include any of its packs + * which were *not* rolled up as part of the geometric repack + * (if any), and the previous tip will be replaced. + * + * It may grow to include the packs from zero or more MIDXs from + * the old chain, beginning either at the old tip (if the MIDX + * was *not* rewritten) or the old tip's base MIDX layer + * (otherwise). + */ + step.type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step.u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", opts->names->items[i].string); + string_list_append(&step.u.write, buf.buf); + + trace2_data_string("repack", opts->existing->repo, + "include:fresh", + step.u.write.items[step.u.write.nr - 1].string); + } + for (i = 0; i < opts->geometry->split; i++) { + struct packed_git *p = opts->geometry->pack[i]; + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:fresh:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * Now handle any existing packs which were *not* rewritten. + * + * The list of packs in opts->geometry only contains MIDX'd + * packs from the newest layer when that layer has more than + * 'repack.midxNewLayerThreshold' number of packs. + * + * If the MIDX tip was rewritten (that is, one or more of those + * packs appear below the split line), then add all packs above + * the split line to the new layer, as the old one is no longer + * usable. + * + * If the MIDX tip was not rewritten (that is, all MIDX'd packs + * from the youngest layer appear below the split line, or were + * not included in the geometric repack at all because there + * were too few of them), ignore them since we'll retain the + * existing layer as-is. + */ + for (i = opts->geometry->split; i < opts->geometry->pack_nr; i++) { + struct packed_git *p = opts->geometry->pack[i]; + struct string_list_item *item; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (p->multi_pack_index && + !opts->geometry->midx_tip_rewritten) { + trace2_data_string("repack", opts->existing->repo, + "exclude:unmodified", buf.buf); + continue; + } + + trace2_data_string("repack", opts->existing->repo, + "include:unmodified", buf.buf); + trace2_data_string("repack", opts->existing->repo, + "include:unmodified:midx", + p->multi_pack_index ? "true" : "false"); + + item = string_list_append(&step.u.write, buf.buf); + if (p->multi_pack_index || i == opts->geometry->pack_nr - 1) + item->util = (void *)1; /* mark as preferred */ + + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:unmodified:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * If the MIDX tip was rewritten, then we no longer consider it + * a candidate for compaction, since it will not exist in the + * MIDX chain being built. + */ + if (opts->geometry->midx_tip_rewritten) + m = m->base_midx; + + trace2_data_string("repack", opts->existing->repo, "midx:rewrote-tip", + opts->geometry->midx_tip_rewritten ? "true" : "false"); + + trace2_region_enter("repack", "compact", opts->existing->repo); + + /* + * Compact additional MIDX layers into this proposed one until + * the merging condition is violated. + */ + while (m) { + uint32_t preferred_pack_idx; + + trace2_data_string("repack", opts->existing->repo, + "candidate", midx_get_checksum_hex(m)); + + if (step.objects_nr < m->num_objects / opts->midx_split_factor) { + /* + * Stop compacting MIDX layer as soon as the + * merged size is less than half the size of the + * next layer in the chain. + */ + trace2_data_string("repack", opts->existing->repo, + "compact", "violated"); + trace2_data_intmax("repack", opts->existing->repo, + "objects_nr", + (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "next_objects_nr", + (uintmax_t)m->num_objects); + trace2_data_intmax("repack", opts->existing->repo, + "split_factor", + (uintmax_t)opts->midx_split_factor); + + break; + } + + if (midx_preferred_pack(m, &preferred_pack_idx) < 0) { + ret = error(_("could not find preferred pack for MIDX " + "%s"), midx_get_checksum_hex(m)); + goto out; + } + + for (i = 0; i < m->num_packs; i++) { + struct string_list_item *item; + uint32_t pack_int_id = i + m->num_packs_in_base; + struct packed_git *p = nth_midxed_pack(m, pack_int_id); + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + trace2_data_string("repack", opts->existing->repo, + "midx:pack", buf.buf); + + item = string_list_append(&step.u.write, buf.buf); + if (pack_int_id == preferred_pack_idx) + item->util = (void *)1; /* mark as preferred */ + } + + if (unsigned_add_overflows(step.objects_nr, m->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + step.objects_nr += m->num_objects; + + m = m->base_midx; + } + + if (step.u.write.nr > 0) { + /* + * As long as there is at least one new pack to write + * (and thus the MIDX is non-empty), add it to the plan. + */ + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + steps[steps_nr++] = step; + } + + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "step:packs_nr", (uintmax_t)step.u.write.nr); + + trace2_region_leave("repack", "compact", opts->existing->repo); + trace2_region_leave("repack", "steps:write", opts->existing->repo); + + trace2_region_enter("repack", "steps:rest", opts->existing->repo); + + /* + * Then start over, repeat, and either compact or keep as-is + * each MIDX layer until we have exhausted the chain. + * + * Finally, evaluate the remainder of the chain (if any) and + * either compact a sequence of adjacent layers, or keep + * individual layers as-is according to the same merging + * condition as above. + */ + while (m) { + struct multi_pack_index *next = m; + + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + + memset(&step, 0, sizeof(step)); + step.type = MIDX_COMPACTION_STEP_UNKNOWN; + + trace2_region_enter("repack", "step", opts->existing->repo); + + trace2_data_string("repack", opts->existing->repo, + "from", midx_get_checksum_hex(m)); + + while (next) { + uint32_t proposed_objects_nr; + if (unsigned_add_overflows(step.objects_nr, next->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + trace2_region_leave("repack", "step", opts->existing->repo); + goto out; + } + + proposed_objects_nr = step.objects_nr + next->num_objects; + + trace2_data_string("repack", opts->existing->repo, + "proposed", + midx_get_checksum_hex(next)); + trace2_data_intmax("repack", opts->existing->repo, + "proposed:objects_nr", + (uintmax_t)next->num_objects); + + if (!next->base_midx) { + /* + * If we are at the end of the MIDX + * chain, there is nothing to compact, + * so mark it and stop. + */ + step.objects_nr = proposed_objects_nr; + break; + } + + if (proposed_objects_nr < next->base_midx->num_objects / opts->midx_split_factor) { + /* + * If there is a MIDX following this + * one, but our accumulated size is less + * than half of its size, compacting + * them would violate the merging + * condition, so stop here. + */ + + trace2_data_string("repack", opts->existing->repo, + "compact:violated:at", + midx_get_checksum_hex(next->base_midx)); + trace2_data_intmax("repack", opts->existing->repo, + "compact:violated:at:objects_nr", + (uintmax_t)next->base_midx->num_objects); + break; + } + + /* + * Otherwise, it is OK to compact the next layer + * into this one. Do so, and then continue + * through the remainder of the chain. + */ + step.objects_nr = proposed_objects_nr; + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", + (uintmax_t)step.objects_nr); + next = next->base_midx; + } + + if (m == next) { + step.type = MIDX_COMPACTION_STEP_COPY; + step.u.copy = m; + + trace2_data_string("repack", opts->existing->repo, + "type", "copy"); + } else { + step.type = MIDX_COMPACTION_STEP_COMPACT; + step.u.compact.from = next; + step.u.compact.to = m; + + trace2_data_string("repack", opts->existing->repo, + "to", midx_get_checksum_hex(m)); + trace2_data_string("repack", opts->existing->repo, + "type", "compact"); + } + + m = next->base_midx; + steps[steps_nr++] = step; + trace2_region_leave("repack", "step", opts->existing->repo); + } + + trace2_region_leave("repack", "steps:rest", opts->existing->repo); + +out: + *steps_p = steps; + *steps_nr_p = steps_nr; + + strbuf_release(&buf); + + trace2_region_leave("repack", "make_midx_compaction_plan", + opts->existing->repo); + + return ret; +} + +static int write_midx_incremental(struct repack_write_midx_opts *opts) +{ + struct midx_compaction_step *steps = NULL; + struct strbuf lock_name = STRBUF_INIT; + struct lock_file lf; + size_t steps_nr = 0; + size_t i; + int ret = 0; + + get_midx_chain_filename(opts->existing->source, &lock_name); + if (safe_create_leading_directories(opts->existing->repo, + lock_name.buf)) + die_errno(_("unable to create leading directories of %s"), + lock_name.buf); + hold_lock_file_for_update(&lf, lock_name.buf, LOCK_DIE_ON_ERROR); + + if (!fdopen_lock_file(&lf, "w")) { + ret = error_errno(_("unable to open multi-pack-index chain file")); + goto done; + } + + if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { + ret = error(_("unable to generate compaction plan")); + goto done; + } + + for (i = 0; i < steps_nr; i++) { + struct midx_compaction_step *step = &steps[i]; + char *base = NULL; + + if (i + 1 < steps_nr) + base = xstrdup(midx_compaction_step_base(&steps[i + 1])); + + if (midx_compaction_step_exec(step, opts, base) < 0) { + ret = error(_("unable to execute compaction step %"PRIuMAX), + (uintmax_t)i); + free(base); + goto done; + } + + free(base); + } + + i = steps_nr; + while (i--) { + struct midx_compaction_step *step = &steps[i]; + if (!step->csum) + BUG("missing result for compaction step %"PRIuMAX, + (uintmax_t)i); + fprintf(get_lock_file_fp(&lf), "%s\n", step->csum); + } + + commit_lock_file(&lf); + +done: + strbuf_release(&lock_name); + for (i = 0; i < steps_nr; i++) + midx_compaction_step_release(&steps[i]); + free(steps); + return ret; +} + int repack_write_midx(struct repack_write_midx_opts *opts) { switch (opts->mode) { @@ -386,6 +951,8 @@ int repack_write_midx(struct repack_write_midx_opts *opts) BUG("write_midx mode is NONE?"); case REPACK_WRITE_MIDX_DEFAULT: return write_midx_included_packs(opts); + case REPACK_WRITE_MIDX_INCREMENTAL: + return write_midx_incremental(opts); default: BUG("unhandled write_midx mode: %d", opts->mode); } diff --git a/repack.h b/repack.h index 81907fcce7..831ccfb1c6 100644 --- a/repack.h +++ b/repack.h @@ -137,6 +137,7 @@ struct tempfile; enum repack_write_midx_mode { REPACK_WRITE_MIDX_NONE, REPACK_WRITE_MIDX_DEFAULT, + REPACK_WRITE_MIDX_INCREMENTAL, }; struct repack_write_midx_opts { @@ -148,6 +149,8 @@ struct repack_write_midx_opts { int show_progress; int write_bitmaps; int midx_must_contain_cruft; + int midx_split_factor; + int midx_new_layer_threshold; enum repack_write_midx_mode mode; }; From 938af8926099882ff87f8ffa4115c34ed63d9e8b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:22 -0400 Subject: [PATCH 15/16] repack: introduce `--write-midx=incremental` Expose the incremental MIDX repacking mode (implemented in an earlier commit) via a new --write-midx=incremental option for `git repack`. Add "incremental" as a recognized argument to the --write-midx OPT_CALLBACK, mapping it to REPACK_WRITE_MIDX_INCREMENTAL. When this mode is active and --geometric is in use, set the midx_layer_threshold on the pack geometry so that only packs in sufficiently large tip layers are considered for repacking. Two new configuration options control the compaction behavior: - repack.midxSplitFactor (default: 2): the factor used in the geometric merging condition for MIDX layers. - repack.midxNewLayerThreshold (default: 8): the minimum number of packs in the tip MIDX layer before its packs are considered as candidates for geometric repacking. Add tests exercising the new mode across a variety of scenarios including basic geometric violations, multi-round chain integrity, branching and merging histories, cross-layer object uniqueness, and threshold-based compaction. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/repack.adoc | 18 ++ Documentation/git-repack.adoc | 39 ++- builtin/repack.c | 49 ++- midx.c | 29 ++ midx.h | 3 + repack-geometry.c | 13 +- repack-midx.c | 5 + repack.c | 56 +++- repack.h | 10 +- t/meson.build | 1 + t/t7705-repack-incremental-midx.sh | 470 +++++++++++++++++++++++++++++ 11 files changed, 669 insertions(+), 24 deletions(-) create mode 100755 t/t7705-repack-incremental-midx.sh diff --git a/Documentation/config/repack.adoc b/Documentation/config/repack.adoc index e9e78dcb19..4c22a499f6 100644 --- a/Documentation/config/repack.adoc +++ b/Documentation/config/repack.adoc @@ -46,3 +46,21 @@ repack.midxMustContainCruft:: `--write-midx`. When false, cruft packs are only included in the MIDX when necessary (e.g., because they might be required to form a reachability closure with MIDX bitmaps). Defaults to true. + +repack.midxSplitFactor:: + The factor used in the geometric merging condition when + compacting incremental MIDX layers during `git repack` when + invoked with the `--write-midx=incremental` option. ++ +Adjacent layers are merged when the accumulated object count of the +newer layer exceeds `1/` of the object count of the next deeper +layer. Must be at least 2. Defaults to 2. + +repack.midxNewLayerThreshold:: + The minimum number of packs in the tip MIDX layer before those + packs are considered as candidates for geometric repacking + during `git repack --write-midx=incremental`. ++ +When the tip layer has fewer packs than this threshold, those packs are +excluded from the geometric repack entirely, and are thus left +unmodified. Must be at least 1. Defaults to 8. diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index 673ce91083..27a99cc46f 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -11,7 +11,7 @@ SYNOPSIS [verse] 'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=] [--depth=] [--threads=] [--keep-pack=] - [--write-midx] [--name-hash-version=] [--path-walk] + [--write-midx[=]] [--name-hash-version=] [--path-walk] DESCRIPTION ----------- @@ -250,9 +250,42 @@ pack as the preferred pack for object selection by the MIDX (see linkgit:git-multi-pack-index[1]). -m:: ---write-midx:: +--write-midx[=]:: Write a multi-pack index (see linkgit:git-multi-pack-index[1]) - containing the non-redundant packs. + containing the non-redundant packs. The following modes are + available: ++ +-- + `default`;; + Write a single MIDX covering all packs. This is the + default when `--write-midx` is given without an + explicit mode. + + `incremental`;; + Write an incremental MIDX chain instead of a single + flat MIDX. This mode requires `--geometric`. ++ +The incremental mode maintains a chain of MIDX layers that is compacted +over time using a geometric merging strategy. Each repack creates a new +tip layer containing the newly written pack(s). Adjacent layers are then +merged whenever the newer layer's object count exceeds +`1/repack.midxSplitFactor` of the next deeper layer's count. Layers +that do not meet this condition are retained as-is. ++ +The result is that newer (tip) layers tend to contain many small packs +with relatively few objects, while older (deeper) layers contain fewer, +larger packs covering more objects. Because compaction is driven by the +tip of the chain, newer layers are also rewritten more frequently than +older ones, which are only touched when enough objects have accumulated +to justify merging into them. This keeps the total number of layers +logarithmic relative to the total number of objects. ++ +Only packs in the tip MIDX layer are considered as candidates for the +geometric repack; packs in deeper layers are left untouched. If the tip +layer contains fewer packs than `repack.midxNewLayerThreshold`, those +packs are excluded from the geometry entirely, and a new layer is +created for any new pack(s) without disturbing the existing chain. +-- --name-hash-version=:: Provide this argument to the underlying `git pack-objects` process. diff --git a/builtin/repack.c b/builtin/repack.c index 75c5773678..5ffa18e085 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,7 @@ static int midx_must_contain_cruft = 1; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" "[--window=] [--depth=] [--threads=] [--keep-pack=]\n" - "[--write-midx] [--name-hash-version=] [--path-walk]"), + "[--write-midx[=]] [--name-hash-version=] [--path-walk]"), NULL }; @@ -48,6 +48,8 @@ static const char incremental_bitmap_conflict_error[] = N_( struct repack_config_ctx { struct pack_objects_args *po_args; struct pack_objects_args *cruft_po_args; + int midx_split_factor; + int midx_new_layer_threshold; }; static int repack_config(const char *var, const char *value, @@ -97,6 +99,16 @@ static int repack_config(const char *var, const char *value, midx_must_contain_cruft = git_config_bool(var, value); return 0; } + if (!strcmp(var, "repack.midxsplitfactor")) { + repack_ctx->midx_split_factor = git_config_int(var, value, + ctx->kvi); + return 0; + } + if (!strcmp(var, "repack.midxnewlayerthreshold")) { + repack_ctx->midx_new_layer_threshold = git_config_int(var, value, + ctx->kvi); + return 0; + } return git_default_config(var, value, ctx, cb); } @@ -112,6 +124,8 @@ static int option_parse_write_midx(const struct option *opt, const char *arg, if (!arg || !*arg) *cfg = REPACK_WRITE_MIDX_DEFAULT; + else if (!strcmp(arg, "incremental")) + *cfg = REPACK_WRITE_MIDX_INCREMENTAL; else return error(_("unknown value for %s: %s"), opt->long_name, arg); @@ -226,6 +240,8 @@ int cmd_repack(int argc, memset(&config_ctx, 0, sizeof(config_ctx)); config_ctx.po_args = &po_args; config_ctx.cruft_po_args = &cruft_po_args; + config_ctx.midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR; + config_ctx.midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD; repo_config(repo, repack_config, &config_ctx); @@ -247,6 +263,9 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) pack_everything |= ALL_INTO_ONE; + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL && !geometry.split_factor) + die(_("--write-midx=incremental requires --geometric")); + if (write_bitmaps < 0) { if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) @@ -273,6 +292,13 @@ int cmd_repack(int argc, write_bitmaps = 0; } + if (config_ctx.midx_split_factor < 2) + die(_("invalid value for %s: %d"), "--midx-split-factor", + config_ctx.midx_split_factor); + if (config_ctx.midx_new_layer_threshold < 1) + die(_("invalid value for %s: %d"), "--midx-new-layer-threshold", + config_ctx.midx_new_layer_threshold); + if (write_midx != REPACK_WRITE_MIDX_NONE && write_bitmaps) { struct strbuf path = STRBUF_INIT; @@ -296,6 +322,10 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL) { + geometry.midx_layer_threshold = config_ctx.midx_new_layer_threshold; + geometry.midx_layer_threshold_set = true; + } pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -545,8 +575,11 @@ int cmd_repack(int argc, packtmp); /* End of pack replacement. */ - if (delete_redundant && pack_everything & ALL_INTO_ONE) + if (delete_redundant && pack_everything & ALL_INTO_ONE) { + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL) + existing_packs_retain_midx_packs(&existing); existing_packs_mark_for_deletion(&existing, &names); + } if (write_midx != REPACK_WRITE_MIDX_NONE) { struct repack_write_midx_opts opts = { @@ -558,8 +591,8 @@ int cmd_repack(int argc, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft, - .midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR, - .midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD, + .midx_split_factor = config_ctx.midx_split_factor, + .midx_new_layer_threshold = config_ctx.midx_new_layer_threshold, .mode = write_midx, }; @@ -572,11 +605,15 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing, packdir); + bool wrote_incremental_midx = write_midx == REPACK_WRITE_MIDX_INCREMENTAL; + + existing_packs_remove_redundant(&existing, packdir, + wrote_incremental_midx); if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing, packdir); + &existing, packdir, + wrote_incremental_midx); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); diff --git a/midx.c b/midx.c index dc86c8e7fe..efbfbb13f4 100644 --- a/midx.c +++ b/midx.c @@ -850,6 +850,35 @@ void clear_midx_file(struct repository *r) strbuf_release(&midx); } +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes) +{ + struct odb_source *source = r->objects->sources; + struct strbuf chain = STRBUF_INIT; + + get_midx_chain_filename(source, &chain); + + for (; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (files->packed->midx) + close_midx(files->packed->midx); + files->packed->midx = NULL; + } + + if (!keep_hashes && remove_path(chain.buf)) + die(_("failed to clear multi-pack-index chain at %s"), + chain.buf); + + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_REV, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_MIDX, + keep_hashes); + + strbuf_release(&chain); +} + static int verify_midx_error; __attribute__((format (printf, 1, 2))) diff --git a/midx.h b/midx.h index 3ee12dd08e..63853a03a4 100644 --- a/midx.h +++ b/midx.h @@ -9,6 +9,7 @@ struct repository; struct bitmapped_pack; struct git_hash_algo; struct odb_source; +struct strvec; #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION_V1 1 @@ -143,6 +144,8 @@ int write_midx_file_compact(struct odb_source *source, const char *incremental_base, unsigned flags); void clear_midx_file(struct repository *r); +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags); diff --git a/repack-geometry.c b/repack-geometry.c index 2408b8a3cc..2064683dcf 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -249,7 +249,8 @@ static void remove_redundant_packs(struct packed_git **pack, uint32_t pack_nr, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -269,7 +270,8 @@ static void remove_redundant_packs(struct packed_git **pack, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf, + wrote_incremental_midx); } strbuf_release(&buf); @@ -278,12 +280,13 @@ static void remove_redundant_packs(struct packed_git **pack, void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs(geometry->pack, geometry->split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); remove_redundant_packs(geometry->promisor_pack, geometry->promisor_split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); } void pack_geometry_release(struct pack_geometry *geometry) diff --git a/repack-midx.c b/repack-midx.c index f97331fb1b..4f5deeb97b 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -887,6 +887,7 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) struct midx_compaction_step *steps = NULL; struct strbuf lock_name = STRBUF_INIT; struct lock_file lf; + struct strvec keep_hashes = STRVEC_INIT; size_t steps_nr = 0; size_t i; int ret = 0; @@ -932,11 +933,15 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) BUG("missing result for compaction step %"PRIuMAX, (uintmax_t)i); fprintf(get_lock_file_fp(&lf), "%s\n", step->csum); + strvec_push(&keep_hashes, step->csum); } commit_lock_file(&lf); + clear_incremental_midx_files(opts->existing->repo, &keep_hashes); + done: + strvec_clear(&keep_hashes); strbuf_release(&lock_name); for (i = 0; i < steps_nr; i++) midx_compaction_step_release(&steps[i]); diff --git a/repack.c b/repack.c index 2ee6b51420..571dabb665 100644 --- a/repack.c +++ b/repack.c @@ -55,14 +55,18 @@ void pack_objects_args_release(struct pack_objects_args *args) } void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name) + const char *base_name, + bool wrote_incremental_midx) { struct strbuf buf = STRBUF_INIT; struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) + if (m && source->local && midx_contains_pack(m, buf.buf)) { clear_midx_file(repo); + if (!wrote_incremental_midx) + clear_incremental_midx_files(repo, NULL); + } strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); @@ -250,25 +254,63 @@ void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } +/* + * Mark every pack that is referenced by the existing MIDX chain as + * retained, so that a subsequent call to + * existing_packs_mark_for_deletion() will not mark them for deletion. + * + * This is used when writing an incremental MIDX layer on top of an + * existing chain: retained layers continue to reference the same + * packs on disk, so those packs must not be unlinked even if the + * freshly-written pack supersedes them. + */ +void existing_packs_retain_midx_packs(struct existing_packs *existing) +{ + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->midx_packs) { + struct string_list_item *found; + + strbuf_reset(&buf); + strbuf_addstr(&buf, item->string); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_strip_suffix(&buf, ".idx"); + + found = string_list_lookup(&existing->non_kept_packs, buf.buf); + if (found) + existing_packs_mark_retained(found); + + found = string_list_lookup(&existing->cruft_packs, buf.buf); + if (found) + existing_packs_mark_retained(found); + } + + strbuf_release(&buf); +} + static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - repack_remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string, + wrote_incremental_midx); } } void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); + packdir, wrote_incremental_midx); remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); + packdir, wrote_incremental_midx); } void existing_packs_release(struct existing_packs *existing) diff --git a/repack.h b/repack.h index 831ccfb1c6..f9fbc895f0 100644 --- a/repack.h +++ b/repack.h @@ -34,7 +34,8 @@ void prepare_pack_objects(struct child_process *cmd, void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name); + const char *base_name, + bool wrote_incremental_midx); struct write_pack_opts { struct pack_objects_args *po_args; @@ -83,8 +84,10 @@ void existing_packs_retain_cruft(struct existing_packs *existing, struct packed_git *cruft); void existing_packs_mark_for_deletion(struct existing_packs *existing, struct string_list *names); +void existing_packs_retain_midx_packs(struct existing_packs *existing); void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void existing_packs_release(struct existing_packs *existing); struct generated_pack; @@ -129,7 +132,8 @@ struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; diff --git a/t/meson.build b/t/meson.build index 7528e5cda5..25f0d823d8 100644 --- a/t/meson.build +++ b/t/meson.build @@ -951,6 +951,7 @@ integration_tests = [ 't7702-repack-cyclic-alternate.sh', 't7703-repack-geometric.sh', 't7704-repack-cruft.sh', + 't7705-repack-incremental-midx.sh', 't7800-difftool.sh', 't7810-grep.sh', 't7811-grep-open.sh', diff --git a/t/t7705-repack-incremental-midx.sh b/t/t7705-repack-incremental-midx.sh new file mode 100755 index 0000000000..9e317ff6e8 --- /dev/null +++ b/t/t7705-repack-incremental-midx.sh @@ -0,0 +1,470 @@ +#!/bin/sh + +test_description='git repack --write-midx=incremental' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +# incrementally_repack N +# +# Make "N" new commits, each stored in their own pack, and then repacked +# with the --write-midx=incremental strategy. +incrementally_repack () { + for i in $(test_seq 1 "$1") + do + test_commit "$i" && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify || return 1 + done +} + +# Create packs with geometrically increasing sizes so that they +# satisfy the geometric progression and survive a --geometric=2 +# repack without being rolled up. Creates 3 packs containing 1, +# 2, and 6 commits (3, 6, and 18 objects) respectively. +create_geometric_packs () { + test_commit "small" && + git repack -d && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index +} + +# create_layer +# +# Creates a new MIDX layer with the contents of "test_commit_bulk $@". +create_layer () { + test_commit_bulk "$@" && + + git multi-pack-index write --incremental --bitmap +} + +# create_layers +# +# Reads lines of " " from stdin and creates a new MIDX +# layer for each line. See create_layer above for more. +create_layers () { + while read msg nr + do + create_layer --message="$msg" "$nr" || return 1 + done +} + +test_expect_success '--write-midx=incremental requires --geometric' ' + test_must_fail git repack --write-midx=incremental 2>err && + + test_grep -- "--write-midx=incremental requires --geometric" err +' + +test_expect_success 'below layer threshold, tip packs excluded' ' + git init below-layer-threshold-tip-packs-excluded && + ( + cd below-layer-threshold-tip-packs-excluded && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 4 && + git config repack.midxsplitfactor 2 && + + # Create 3 packs forming a geometric progression by + # object count such that they are unmodified by the + # initial repack. The MIDX chain thusly contains a + # single layer with three packs. + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && + + # Repack a new commit. Since the layer threshold is + # unmet, a new MIDX layer is added on top of the + # existing one. + test_commit extra && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + test_line_count = 1 packs.new && + + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual + ) +' + +test_expect_success 'above layer threshold, tip packs repacked' ' + git init above-layer-threshold-tip-packs-repacked && + ( + cd above-layer-threshold-tip-packs-repacked && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + # Same setup, but with the layer threshold set to 2. + # Since the tip MIDX layer meets that threshold, its + # packs are considered repack candidates. + create_geometric_packs && + cp $midx_chain $midx_chain.before && + + # Perturb the existing progression such that it is + # rolled up into a single new pack, invalidating the + # existing MIDX layer and replacing it with a new one. + test_commit extra && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip layer preserved' ' + git init above-layer-threshold-tip-layer-preserved && + ( + cd above-layer-threshold-tip-layer-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 1 "$midx_chain" && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Create objects to form a pack satisfying the geometric + # progression (thus preserving the tip layer), but not + # so large that it meets the layer merging condition. + test_commit_bulk --message="small" 1 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + test_line_count = 1 packs.new && + test_line_count = 3 packs.after && + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip packs preserved' ' + git init above-layer-threshold-tip-packs-preserved && + ( + cd above-layer-threshold-tip-packs-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Same setup as above, but this time the new objects do + # not satisfy the new layer merging condition, resulting + # in a new tip layer. + test_commit_bulk --message="huge" 18 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + test_line_count = 1 packs.new && + + git multi-pack-index verify + ) +' + +test_expect_success 'new tip absorbs multiple layers' ' + git init new-tip-absorbs-multiple-layers && + ( + cd new-tip-absorbs-multiple-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a 4-layer chain where each layer is too small to + # absorb the one below it. The sizes must satisfy L(n) < + # L(n-1)/2 for each adjacent pair: + # + # L0 (oldest): 75 obj (25 commits) + # L1: 21 obj (7 commits, 21 < 75/2) + # L2: 9 obj (3 commits, 9 < 21/2) + # L3 (tip): 3 obj (1 commit, 3 < 9/2) + create_layers <<-\EOF && + L0 25 + L1 7 + L2 3 + L3 1 + EOF + + test_line_count = 4 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Now add a new commit. The merging condition is + # satisfied between L3-L1, but violated at L0, which is + # too large relative to the accumulated size. + # + # As a result, the chain shrinks from 4 to 2 layers. + test_commit new && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'compaction of older layers' ' + git init compaction-of-older-layers && + ( + cd compaction-of-older-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a chain with two small layers at the bottom + # and a larger barrier layer on top, producing a + # chain that violates the compaction invariant, since + # the two small layers would normally have been merged. + create_layers <<-\EOF && + one 2 + two 4 + barrier 54 + EOF + + cp $midx_chain $midx_chain.before && + + # Running an incremental repack compacts the two + # small layers at the bottom of the chain as a + # separate step in the compaction plan. + test_commit another && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'geometric rollup with surviving tip packs' ' + git init geometric-rollup-with-surviving-tip-packs && + ( + cd geometric-rollup-with-surviving-tip-packs && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create a pack large enough to anchor the geometric + # progression when small packs are added alongside it. + create_layer --message="big" 5 && + + test_line_count = 1 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Repack a small number of objects such that the + # progression is unbothered. Note that the existing pack + # is considered a repack candidate as the new layer + # threshold is set to 1. + test_commit small-1 && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + cp $midx_chain $midx_chain.before + ) +' + +test_expect_success 'kept packs are excluded from repack' ' + git init kept-packs-excluded-from-repack && + ( + cd kept-packs-excluded-from-repack && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create two equal-sized packs, marking one as kept. + for i in A B + do + test_commit "$i" && git repack -d || return 1 + done && + + keep=$(ls $packdir/pack-*.idx | head -n 1) && + touch "${keep%.idx}.keep" && + + # The kept pack is excluded as a repacking candidate + # entirely, so no rollup occurs as there is only one + # non-kept pack. A new MIDX layer is written containing + # that pack. + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count = 1 actual.packs && + test_grep ! "$keep" actual.packs && + + git multi-pack-index verify && + + # All objects (from both kept and non-kept packs) + # must still be accessible. + git fsck + ) +' + +test_expect_success 'incremental MIDX with --max-pack-size' ' + git init incremental-midx-with--max-pack-size && + ( + cd incremental-midx-with--max-pack-size && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + # Now add enough data that a small --max-pack-size will + # cause pack-objects to split its output. Create objects + # large enough to fill multiple packs. + test-tool genrandom foo 1M >big1 && + test-tool genrandom bar 1M >big2 && + git add big1 big2 && + test_tick && + git commit -a -m "big blobs" && + git repack -d && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index --max-pack-size=1M && + + test_line_count = 1 "$midx_chain" && + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count -gt 1 actual.packs && + + git multi-pack-index verify + ) +' + +test_expect_success 'noop repack preserves valid MIDX chain' ' + git init noop-repack-preserves-valid-midx-chain && + ( + cd noop-repack-preserves-valid-midx-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + git multi-pack-index verify && + cp $midx_chain $midx_chain.before && + + # Running again with no new objects should not break + # the MIDX chain. It produces "Nothing new to pack." + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_cmp $midx_chain.before $midx_chain && + + git multi-pack-index verify && + git fsck + ) +' + +test_expect_success 'repack -ad removes stale incremental chain' ' + git init repack--ad-removes-stale-incremental-chain && + ( + cd repack--ad-removes-stale-incremental-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layers <<-\EOF && + one 1 + two 1 + EOF + + test_path_is_file $midx_chain && + test_line_count = 2 $midx_chain && + + git repack -ad && + + test_path_is_missing $packdir/multi-pack-index && + test_dir_is_empty $midxdir + ) +' + +test_expect_success 'repack rejects invalid midxSplitFactor' ' + test_when_finished "rm -fr bad-split-factor" && + git init bad-split-factor && + ( + cd bad-split-factor && + test_commit base && + + for v in 0 1 -1 + do + test_must_fail git -c repack.midxSplitFactor=$v \ + repack -d --geometric=2 --write-midx=incremental 2>err && + test_grep "invalid value for --midx-split-factor" err || + return 1 + done + ) +' + +test_expect_success 'repack rejects invalid midxNewLayerThreshold' ' + test_when_finished "rm -fr bad-layer-threshold" && + git init bad-layer-threshold && + ( + cd bad-layer-threshold && + test_commit base && + + for v in 0 -1 + do + test_must_fail git -c repack.midxNewLayerThreshold=$v \ + repack -d --geometric=2 --write-midx=incremental 2>err && + test_grep "invalid value for --midx-new-layer-threshold" err || + return 1 + done + ) +' + +test_done From 06733a50eeec4205011d210d3932c5b708a665e9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:25 -0400 Subject: [PATCH 16/16] repack: allow `--write-midx=incremental` without `--geometric` Previously, `--write-midx=incremental` required `--geometric` and would die() without it. Relax this restriction so that incremental MIDX repacking can be used independently. Without `--geometric`, the behavior is append-only: a single new MIDX layer is created containing whatever packs were written by the repack and appended to the existing chain (or a new chain is started). Existing layers are preserved as-is with no compaction or merging. Implement this via a new repack_make_midx_append_plan() that builds a plan consisting of a WRITE step for the freshly written packs followed by COPY steps for every existing MIDX layer. The existing compaction plan (repack_make_midx_compaction_plan) is used only when `--geometric` is active. Update the documentation to describe the behavior with and without `--geometric`, and replace the test that enforced the old restriction with one exercising append-only incremental MIDX repacking. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-repack.adoc | 19 +++++---- builtin/repack.c | 3 -- repack-midx.c | 64 +++++++++++++++++++++++++++-- t/t7705-repack-incremental-midx.sh | 65 +++++++++++++++++++++++++++--- 4 files changed, 133 insertions(+), 18 deletions(-) diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index 27a99cc46f..72c42015e2 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -263,14 +263,19 @@ linkgit:git-multi-pack-index[1]). `incremental`;; Write an incremental MIDX chain instead of a single - flat MIDX. This mode requires `--geometric`. + flat MIDX. + -The incremental mode maintains a chain of MIDX layers that is compacted -over time using a geometric merging strategy. Each repack creates a new -tip layer containing the newly written pack(s). Adjacent layers are then -merged whenever the newer layer's object count exceeds -`1/repack.midxSplitFactor` of the next deeper layer's count. Layers -that do not meet this condition are retained as-is. +Without `--geometric`, a new MIDX layer is appended to the existing +chain (or a new chain is started) containing whatever packs were written +by the repack. Existing layers are preserved as-is. ++ +When combined with `--geometric`, the incremental mode maintains a chain +of MIDX layers that is compacted over time using a geometric merging +strategy. Each repack creates a new tip layer containing the newly +written pack(s). Adjacent layers are then merged whenever the newer +layer's object count exceeds `1/repack.midxSplitFactor` of the next +deeper layer's count. Layers that do not meet this condition are +retained as-is. + The result is that newer (tip) layers tend to contain many small packs with relatively few objects, while older (deeper) layers contain fewer, diff --git a/builtin/repack.c b/builtin/repack.c index 5ffa18e085..1524a9c13a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -263,9 +263,6 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) pack_everything |= ALL_INTO_ONE; - if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL && !geometry.split_factor) - die(_("--write-midx=incremental requires --geometric")); - if (write_bitmaps < 0) { if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) diff --git a/repack-midx.c b/repack-midx.c index 4f5deeb97b..b6b1de7180 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -548,6 +548,60 @@ static void midx_compaction_step_release(struct midx_compaction_step *step) free(step->csum); } +/* + * Build an append-only MIDX plan: a single WRITE step for the freshly + * written packs, plus COPY steps for every existing layer. No + * compaction or merging is performed. + */ +static void repack_make_midx_append_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step *step; + size_t steps_nr = 0, steps_alloc = 0; + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + if (opts->names->nr) { + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step->u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", + opts->names->items[i].string); + string_list_append(&step->u.write, buf.buf); + } + + strbuf_release(&buf); + } + + for (; m; m = m->base_midx) { + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_COPY; + step->u.copy = m; + step->objects_nr = m->num_objects; + } + + *steps_p = steps; + *steps_nr_p = steps_nr; +} + static int repack_make_midx_compaction_plan(struct repack_write_midx_opts *opts, struct midx_compaction_step **steps_p, size_t *steps_nr_p) @@ -904,9 +958,13 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) goto done; } - if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { - ret = error(_("unable to generate compaction plan")); - goto done; + if (opts->geometry->split_factor) { + if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { + ret = error(_("unable to generate compaction plan")); + goto done; + } + } else { + repack_make_midx_append_plan(opts, &steps, &steps_nr); } for (i = 0; i < steps_nr; i++) { diff --git a/t/t7705-repack-incremental-midx.sh b/t/t7705-repack-incremental-midx.sh index 9e317ff6e8..25a8c40e8e 100755 --- a/t/t7705-repack-incremental-midx.sh +++ b/t/t7705-repack-incremental-midx.sh @@ -63,10 +63,36 @@ create_layers () { done } -test_expect_success '--write-midx=incremental requires --geometric' ' - test_must_fail git repack --write-midx=incremental 2>err && +test_expect_success '--write-midx=incremental without --geometric' ' + git init incremental-without-geometric && + ( + cd incremental-without-geometric && - test_grep -- "--write-midx=incremental requires --geometric" err + git config maintenance.auto false && + + test_commit first && + git repack -d && + + test_commit second && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && + + # A second repack appends a new layer without + # disturbing the existing one. + test_commit third && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 2 $midx_chain && + head -n 1 $midx_chain.before >expect && + head -n 1 $midx_chain >actual && + test_cmp expect actual && + + git fsck + ) ' test_expect_success 'below layer threshold, tip packs excluded' ' @@ -334,8 +360,7 @@ test_expect_success 'kept packs are excluded from repack' ' # entirely, so no rollup occurs as there is only one # non-kept pack. A new MIDX layer is written containing # that pack. - git repack --geometric=2 -d --write-midx=incremental \ - --write-bitmap-index && + git repack --geometric=2 -d --write-midx=incremental && test-tool read-midx $objdir >actual && grep "^pack-.*\.idx$" actual >actual.packs && @@ -433,6 +458,36 @@ test_expect_success 'repack -ad removes stale incremental chain' ' ) ' +test_expect_success 'repack -ad --write-midx=incremental is safe' ' + git init ad-incremental-midx && + ( + cd ad-incremental-midx && + + git config maintenance.auto false && + + # Build a MIDX chain with multiple layers referencing + # distinct packs. + test_commit first && + git repack -d && + + test_commit second && + git repack -d --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 1 $midx_chain && + + # Now do a full -ad repack. The new pack contains all + # objects, but any retained MIDX layers still reference + # the now-deleted packs. + test_commit third && + git repack -ad --write-midx=incremental && + + git multi-pack-index verify && + git fsck && + git rev-list --all --objects >/dev/null + ) +' + test_expect_success 'repack rejects invalid midxSplitFactor' ' test_when_finished "rm -fr bad-split-factor" && git init bad-split-factor &&