From af1ddcb2b55e7e91894fb911c04a5a4a509e2dc7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:14 -0400 Subject: [PATCH 01/50] builtin/repack.c: avoid "the_repository" in `cmd_repack()` Reduce builtin/repack.c's reliance on `the_repository` by using the currently-UNUSED "repo" parameter within cmd_repack(). The following commits will continue to reduce the usage of the_repository in other places within builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e8730808c5..305782b2c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1247,7 +1247,7 @@ static const char *find_pack_prefix(const char *packdir, const char *packtmp) int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1344,7 +1344,7 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + repo_config(repo, repack_config, &cruft_po_args); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1354,7 +1354,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1376,7 +1376,7 @@ int cmd_repack(int argc, die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1391,7 +1391,8 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); @@ -1400,7 +1401,7 @@ int cmd_repack(int argc, strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); @@ -1439,7 +1440,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1535,7 +1536,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } @@ -1618,9 +1619,9 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { + if (get_multi_pack_index(repo->objects->sources)) { struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); + get_multi_pack_index(repo->objects->sources); ALLOC_ARRAY(midx_pack_names, m->num_packs + m->num_packs_in_base); @@ -1631,7 +1632,7 @@ int cmd_repack(int argc, xstrdup(m->pack_names[i]); } - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. @@ -1688,7 +1689,7 @@ int cmd_repack(int argc, goto cleanup; } - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; @@ -1704,18 +1705,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } From 73ba2c9b39e56829fd9a114f7bbe425e69ebf7d8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:18 -0400 Subject: [PATCH 02/50] builtin/repack.c: avoid "the_repository" in existing packs API There are a number of spots within builtin/repack.c which refer to "the_repository", and either make use of the "existing packs" API or otherwise have a 'struct existing_packs *' in scope. Add a "repo" member to "struct existing_packs" and use that instead of "the_repository" in such locations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 305782b2c9..7223553bed 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -126,6 +126,7 @@ static void pack_objects_args_release(struct pack_objects_args *args) } struct existing_packs { + struct repository *repo; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; @@ -265,7 +266,7 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -498,7 +499,7 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -1139,7 +1140,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; @@ -1405,6 +1406,7 @@ int cmd_repack(int argc, packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); + existing.repo = repo; collect_pack_filenames(&existing, &keep_pack_list); if (geometry.split_factor) { From 2f425fe2fcc6aef33576acc99335e57ffd3a217f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:22 -0400 Subject: [PATCH 03/50] builtin/repack.c: avoid "the_repository" when taking a ref snapshot Avoid using "the_repository" in various MIDX-related ref snapshotting functions. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7223553bed..113f5fc67f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -771,6 +771,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, } struct midx_snapshot_ref_data { + struct repository *repo; struct tempfile *f; struct oidset seen; int preferred; @@ -784,13 +785,13 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, struct midx_snapshot_ref_data *data = _data; struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(data->repo, oid, &peeled)) oid = &peeled; if (oidset_insert(&data->seen, oid)) return 0; /* already seen */ - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", @@ -799,11 +800,12 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, return 0; } -static void midx_snapshot_refs(struct tempfile *f) +static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) { struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); + const struct string_list *preferred = bitmap_preferred_tips(repo); + data.repo = repo; data.f = f; data.preferred = 0; oidset_init(&data.seen, 0); @@ -817,13 +819,13 @@ static void midx_snapshot_refs(struct tempfile *f) data.preferred = 1; for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), + refs_for_each_ref_in(get_main_ref_store(repo), item->string, midx_snapshot_ref_one, &data); data.preferred = 0; } - refs_for_each_ref(get_main_ref_store(the_repository), + refs_for_each_ref(get_main_ref_store(repo), midx_snapshot_ref_one, &data); if (close_tempfile_gently(f)) { @@ -1397,7 +1399,7 @@ int cmd_repack(int argc, "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } From c97715584399e494245dd25c3012dc210a02be8b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:26 -0400 Subject: [PATCH 04/50] builtin/repack.c: avoid "the_repository" when removing packs The 'remove_redundant_pack()' function uses "the_repository" to obtain, and optionally remove, the repository's MIDX. Instead of relying on "the_repository", pass around a "struct repository *" parameter through its callers, and use that instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 113f5fc67f..93802531e1 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,33 +221,35 @@ static void mark_packs_for_deletion(struct existing_packs *existing, mark_packs_for_deletion_1(names, &existing->cruft_packs); } -static void remove_redundant_pack(const char *dir_name, const char *base_name) +static void remove_redundant_pack(struct repository *repo, + const char *dir_name, const char *base_name) { struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; + struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); + clear_midx_file(repo); strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } -static void remove_redundant_packs_1(struct string_list *packs) +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(packdir, item->string); + remove_redundant_pack(repo, packdir, item->string); } } static void remove_redundant_existing_packs(struct existing_packs *existing) { - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs); } static void existing_packs_release(struct existing_packs *existing) @@ -685,7 +687,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(packdir, buf.buf); + remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); From 14cae948432bae0f753b9ffd8afc84dd73063991 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:30 -0400 Subject: [PATCH 05/50] builtin/repack.c: avoid "the_repository" when repacking promisor objects Pass a "struct repository" pointer to the 'repack_promisor_objects()' function to avoid using "the_repository". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 93802531e1..4f08b57ddb 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -407,7 +407,8 @@ static int has_pack_ext(const struct generated_pack_data *data, BUG("unknown pack extension: '%s'", ext); } -static void repack_promisor_objects(const struct pack_objects_args *args, +static void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, struct string_list *names) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -424,7 +425,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(the_repository, write_oid, &cmd, + for_each_packed_object(repo, write_oid, &cmd, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { @@ -1458,7 +1459,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names); if (has_existing_non_kept_packs(&existing) && delete_redundant && From d5649bbff5b9a2b00bd8a325af1420ce3246fd9a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:33 -0400 Subject: [PATCH 06/50] builtin/repack.c: avoid "the_hash_algo" when deleting packs The "mark_packs_for_deletion_1" function uses "the_hash_algo->hexsz" to isolate a pack's checksum before deleting it to avoid deleting a newly written pack having the same checksum (that is, some generated pack wound up identical to an existing pack). Avoid this by passing down a "struct git_hash_algo" pointer, and refer to the hash algorithm through it instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4f08b57ddb..094f5a0cc2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -168,11 +168,12 @@ static int pack_is_retained(struct string_list_item *item) return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(struct string_list *names, +static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, struct string_list *list) { struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; + const int hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -217,8 +218,9 @@ static void mark_packs_for_deletion(struct existing_packs *existing, struct string_list *names) { - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); + const struct git_hash_algo *algop = existing->repo->hash_algo; + mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); + mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, From 9347979fb0f4c68ef27c8360096161ed69b5715c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:37 -0400 Subject: [PATCH 07/50] builtin/repack.c: avoid "the_hash_algo" in `write_oid()` In a similar spirit as the previous commit, avoid referring directly to "the_hash_algo" within builtin/repack.c::write_oid(). Unlike the previous commit, we are within a callback function, so must introduce a new struct to pass additional data through its "data" pointer. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 094f5a0cc2..7d62959dc2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -339,6 +339,11 @@ static void prepare_pack_objects(struct child_process *cmd, cmd->out = -1; } +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + /* * Write oid to the given struct child_process's stdin, starting it first if * necessary. @@ -347,14 +352,15 @@ static int write_oid(const struct object_id *oid, struct packed_git *pack UNUSED, uint32_t pos UNUSED, void *data) { - struct child_process *cmd = data; + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; if (cmd->in == -1) { if (start_command(cmd)) die(_("could not start pack-objects to repack promisor objects")); } - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || write_in_full(cmd->in, "\n", 1) < 0) die(_("failed to feed promisor objects to pack-objects")); return 0; @@ -413,6 +419,7 @@ static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) { + struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; FILE *out; struct strbuf line = STRBUF_INIT; @@ -427,7 +434,9 @@ static void repack_promisor_objects(struct repository *repo, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(repo, write_oid, &cmd, + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { From 62b7efe90300ef718d6e89dfae9394b86ba72b81 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:41 -0400 Subject: [PATCH 08/50] builtin/repack: avoid "the_hash_algo" in `repack_promisor_objects()` In a similar spirit as the previous commits, avoid referring directly to "the_hash_algo" within builtin/repack.c::repack_promisor_objects(). Since there is already a repository pointer in scope, use its hash_algo value instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7d62959dc2..a7e94ed03c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -452,7 +452,7 @@ static void repack_promisor_objects(struct repository *repo, struct string_list_item *item; char *promisor_name; - if (line.len != the_hash_algo->hexsz) + if (line.len != repo->hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only from pack-objects.")); item = string_list_append(names, line.buf); From 29b820b234127270d9cb021ba511039dd701df74 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:45 -0400 Subject: [PATCH 09/50] builtin/repack.c: avoid "the_hash_algo" in `finish_pack_objects_cmd()` In a similar spirit as previous commits, avoid referring directly to "the_hash_algo" in builtin/repack.c::finish_pack_objects_cmd() and instead accept one as a parameter to the function. Since this function has a number of callers throughout the builtin, the diff is a little noisier than previous commits. However, each hunk is limited to passing the hash_algo parameter from a repository pointer that is already in scope. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a7e94ed03c..a043704aa8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1073,7 +1073,8 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } -static int finish_pack_objects_cmd(struct child_process *cmd, +static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct child_process *cmd, struct string_list *names, int local) { @@ -1084,7 +1085,7 @@ static int finish_pack_objects_cmd(struct child_process *cmd, while (strbuf_getline_lf(&line, out) != EOF) { struct string_list_item *item; - if (line.len != the_hash_algo->hexsz) + if (line.len != algop->hexsz) die(_("repack: Expecting full hex object ID lines only " "from pack-objects.")); /* @@ -1150,7 +1151,8 @@ static int write_filtered_pack(const struct pack_objects_args *args, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -1247,7 +1249,8 @@ static int write_cruft_pack(const struct pack_objects_args *args, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static const char *find_pack_prefix(const char *packdir, const char *packtmp) @@ -1534,7 +1537,7 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); if (ret) goto cleanup; From b908981ce8047c92b2229943412c28c12c1b7aee Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:49 -0400 Subject: [PATCH 10/50] builtin/repack.c: avoid using `hash_to_hex()` in pack geometry In previous commits, we started passing either repository or git_hash_algo pointers around to various spots within builtin/repack.c to reduce our dependency on the_repository in the hope of undef'ing USE_THE_REPOSITORY_VARIABLE. This commit takes us as far as we can (easily) go in that direction by removing the only use of a convenience function that only exists when USE_THE_REPOSITORY_VARIABLE is defined. Unfortunately, the only other such function is "is_bare_repository()", which is less than straightforward to convert into, say, "repo_is_bare()", the latter of the two accepting a repository pointer. Punt on that for now, and declare this commit as the stopping point for our efforts in the direction of undef'ing USE_THE_REPOSITORY_VARIABLE. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index a043704aa8..0d35f15b4b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -683,12 +683,14 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing) { + const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; uint32_t i; for (i = 0; i < geometry->split; i++) { struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) continue; strbuf_reset(&buf); From 0ec811ac1868845e1dea1dac3b0de8f3a4c64f14 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:53 -0400 Subject: [PATCH 11/50] repack: introduce new compilation unit Over the years, builtin/repack.c has turned into a grab-bag of functionality powering the 'git repack' builtin. Among its many capabilities, it: - can build and spawn 'git pack-objects' commands, which in turn generate new packs - has infrastructure to manage the set of existing packs in a repository - has infrastructure to split a sequence of packs into a geometric progression based on object size - can manage both generating and combining cruft packs together - can write new MIDXs to name a few. As a result, this builtin has accumulated a lot of code, making adding new functionality difficult. In the future, 'repack' will learn how to manage a chain of incremental MIDXs, adding yet more functionality into the builtin. As a prerequisite step, let's first move some of the functionality in the builtin into its own repack.[ch]. This will be done over the course of many steps, since there are many individual components, some of which will end up in other, yet-to-exist compilation units of their own. Some of the code movement here is also non-trivial, so performing it in individual steps will make it easier to verify. Let's start by migrating 'struct pack_objects_args' (and the related corresponding pack_objects_args_release() function) into repack.h, and teach both the Makefile and Meson how to build the new compilation unit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 25 +------------------------ meson.build | 1 + repack.c | 11 +++++++++++ repack.h | 23 +++++++++++++++++++++++ 5 files changed, 37 insertions(+), 24 deletions(-) create mode 100644 repack.c create mode 100644 repack.h diff --git a/Makefile b/Makefile index 4c95affadb..c0df6da237 100644 --- a/Makefile +++ b/Makefile @@ -1136,6 +1136,7 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index 0d35f15b4b..6dfcb3327e 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -19,6 +19,7 @@ #include "prune-packed.h" #include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" #include "pack.h" #include "pack-bitmap.h" @@ -53,21 +54,6 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; -}; - static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { @@ -116,15 +102,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - struct existing_packs { struct repository *repo; struct string_list kept_packs; diff --git a/meson.build b/meson.build index b3dfcc0497..993e8f368f 100644 --- a/meson.build +++ b/meson.build @@ -462,6 +462,7 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack.c b/repack.c new file mode 100644 index 0000000000..a1f5b796fb --- /dev/null +++ b/repack.c @@ -0,0 +1,11 @@ +#include "git-compat-util.h" +#include "repack.h" + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} diff --git a/repack.h b/repack.h new file mode 100644 index 0000000000..421d439d5a --- /dev/null +++ b/repack.h @@ -0,0 +1,23 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + struct list_objects_filter_options filter_options; +}; + +void pack_objects_args_release(struct pack_objects_args *args); + +#endif /* REPACK_H */ From a953609865ac4d0875cc878146c6b8e0c7c28643 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:07:56 -0400 Subject: [PATCH 12/50] builtin/repack.c: pass both pack_objects args to repack_config A subsequent commit will remove 'delta_base_offset' as a static variable within builtin/repack.c, and reintroduce it as a member of the 'struct pack_objects_args'. As a result, the repack_config callback will need to have both the cruft- and non-cruft 'struct pack_objects_args's in scope. Introduce a new 'struct repack_config_ctx' to allow the callee to provide both pointers to the callback. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6dfcb3327e..af6de8d77a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -54,10 +54,16 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; +}; + static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { delta_base_offset = git_config_bool(var, value); return 0; @@ -1260,6 +1266,7 @@ int cmd_repack(int argc, size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; @@ -1343,7 +1350,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(repo, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); From 17ee881425c68c58f08a5bc62b8074b3e04c6296 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:00 -0400 Subject: [PATCH 13/50] repack: move 'delta_base_offset' to 'struct pack_objects_args' The static variable 'delta_base_offset' determines whether or not we pass the "--delta-base-offset" command-line argument when spawning pack-objects as a child process. Its introduction dates back to when repack was rewritten in C, all the way back in a1bbc6c017 (repack: rewrite the shell script in C, 2013-09-15). 'struct pack_objects_args' was introduced much later on in 4571324b99 (builtin/repack.c: allow configuring cruft pack generation, 2022-05-20), but did not move the 'delta_base_offset' variable. Since the 'delta_base_offset' is a property of an individual pack-objects command, re-introduce that variable as a member of 'struct pack_objects_args', which will enable further code movement in the subsequent commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 ++++++----- repack.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index af6de8d77a..f4af830353 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -34,7 +34,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; @@ -63,9 +62,10 @@ static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { @@ -315,7 +315,7 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--local"); if (args->quiet) strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) + if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; @@ -1271,8 +1271,8 @@ int cmd_repack(int argc, const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1567,6 +1567,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, cruft_expiration, diff --git a/repack.h b/repack.h index 421d439d5a..12632d7fec 100644 --- a/repack.h +++ b/repack.h @@ -15,9 +15,12 @@ struct pack_objects_args { int local; int name_hash_version; int path_walk; + int delta_base_offset; struct list_objects_filter_options filter_options; }; +#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } + void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ From 1ebaa9376eea424a9bd955a0782547a040a8b44e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:04 -0400 Subject: [PATCH 14/50] repack: remove 'prepare_pack_objects' from the builtin Now that the 'prepare_pack_objects' function no longer refers to external, static variables, move it out to repack.h as generic functionality. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 34 ---------------------------------- repack.c | 35 +++++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f4af830353..ff93654cfe 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -288,40 +288,6 @@ static void collect_pack_filenames(struct existing_packs *existing, strbuf_release(&buf); } -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (args->delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index a1f5b796fb..91b6e1cc09 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,40 @@ #include "git-compat-util.h" #include "repack.h" +#include "run-command.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} void pack_objects_args_release(struct pack_objects_args *args) { diff --git a/repack.h b/repack.h index 12632d7fec..3f7ec20735 100644 --- a/repack.h +++ b/repack.h @@ -21,6 +21,11 @@ struct pack_objects_args { #define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ From f989b706d1fafd60e2f37dfc2d3f99e093b980be Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:08 -0400 Subject: [PATCH 15/50] builtin/repack.c: rename many 'struct existing_packs' functions Rename many of the 'struct existing_packs'-related functions according to the convention introduced in and described by 541204aabe (Documentation: document naming schema for structs and their functions, 2024-07-30). Note that some functions which operate over an individual entry in the list of existing packs are prefixed with "existing_pack_" instead of the plural form. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 +++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index ff93654cfe..f82e6c3930 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -121,39 +121,39 @@ struct existing_packs { .cruft_packs = STRING_LIST_INIT_DUP, \ } -static int has_existing_non_kept_packs(const struct existing_packs *existing) +static int existing_packs_has_non_kept(const struct existing_packs *existing) { return existing->non_kept_packs.nr || existing->cruft_packs.nr; } -static void pack_mark_for_deletion(struct string_list_item *item) +static void existing_pack_mark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | DELETE_PACK); } -static void pack_unmark_for_deletion(struct string_list_item *item) +static void existing_pack_unmark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); } -static int pack_is_marked_for_deletion(struct string_list_item *item) +static int existing_pack_is_marked_for_deletion(struct string_list_item *item) { return (uintptr_t)item->util & DELETE_PACK; } -static void pack_mark_retained(struct string_list_item *item) +static void existing_packs_mark_retained(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); } -static int pack_is_retained(struct string_list_item *item) +static int existing_pack_is_retained(struct string_list_item *item) { return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) { struct string_list_item *item; const int hexsz = algop->hexsz; @@ -165,8 +165,8 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, continue; sha1 = item->string + len - hexsz; - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); } else if (!string_list_has_string(names, sha1)) { /* * Mark this pack for deletion, which ensures @@ -175,13 +175,13 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, * will actually delete this pack (if `-d` was * given). */ - pack_mark_for_deletion(item); + existing_pack_mark_for_deletion(item); } } } -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) +static void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) { struct strbuf buf = STRBUF_INIT; struct string_list_item *item; @@ -193,17 +193,19 @@ static void retain_cruft_pack(struct existing_packs *existing, if (!item) BUG("could not find cruft pack '%s'", pack_basename(cruft)); - pack_mark_retained(item); + existing_packs_mark_retained(item); strbuf_release(&buf); } -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) +static void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) { const struct git_hash_algo *algop = existing->repo->hash_algo; - mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); - mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, @@ -225,13 +227,13 @@ static void remove_redundant_packs_1(struct repository *repo, { struct string_list_item *item; for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) + if (!existing_pack_is_marked_for_deletion(item)) continue; remove_redundant_pack(repo, packdir, item->string); } } -static void remove_redundant_existing_packs(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); remove_redundant_packs_1(existing->repo, &existing->cruft_packs); @@ -250,7 +252,7 @@ static void existing_packs_release(struct existing_packs *existing) * .keep file or not. Packs without a .keep file are not to be kept * if we are going to pack everything into one file. */ -static void collect_pack_filenames(struct existing_packs *existing, +static void existing_packs_collect(struct existing_packs *existing, const struct string_list *extra_keep) { struct packfile_store *packs = existing->repo->objects->packfiles; @@ -721,7 +723,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, item = string_list_lookup(&existing->non_kept_packs, pack_name); - if (item && !pack_is_marked_for_deletion(item)) + if (item && !existing_pack_is_marked_for_deletion(item)) continue; } @@ -851,7 +853,7 @@ static void midx_included_packs(struct string_list *include, } } else { for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -888,10 +890,10 @@ static void midx_included_packs(struct string_list *include, * --geometric case, but doing so is unnecessary * since no packs are marked as pending * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). */ - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -1128,7 +1130,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, if (p->pack_size < combine_cruft_below_size) { fprintf(in, "-%s\n", pack_basename(p)); } else { - retain_cruft_pack(existing, p); + existing_packs_retain_cruft(existing, p); fprintf(in, "%s\n", pack_basename(p)); } } @@ -1382,7 +1384,7 @@ int cmd_repack(int argc, packtmp = mkpathdup("%s/%s", packdir, packtmp_name); existing.repo = repo; - collect_pack_filenames(&existing, &keep_pack_list); + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) @@ -1431,7 +1433,7 @@ int cmd_repack(int argc, if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(repo, &po_args, &names); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1647,7 +1649,7 @@ int cmd_repack(int argc, /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; @@ -1671,7 +1673,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, From 7be7f73cf21356a2feb701e5968a82e2a762879c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:12 -0400 Subject: [PATCH 16/50] repack: remove 'remove_redundant_pack' from the builtin Extract "remove_redundant_pack()" as generic repack-related functionality by moving its implementation to the repack.[ch] compilation unit. This is a prerequisite to moving the "existing_packs" API, which is one of the callers of this function. (The remaining caller in the pack geometry code will eventually move to its own compilation unit as well, and will likewise rely on this function.) While moving it over, prefix the function name with "repack_" to indicate that it belongs to the repack-subsystem. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++---------------- repack.c | 18 ++++++++++++++++++ repack.h | 3 +++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f82e6c3930..31137cf711 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -208,20 +208,6 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } -static void remove_redundant_pack(struct repository *repo, - const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = repo->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(repo); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs) { @@ -229,7 +215,7 @@ static void remove_redundant_packs_1(struct repository *repo, for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string); } } @@ -652,7 +638,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); diff --git a/repack.c b/repack.c index 91b6e1cc09..3aaa351b5b 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,9 @@ #include "git-compat-util.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" #include "repack.h" +#include "repository.h" #include "run-command.h" void prepare_pack_objects(struct child_process *cmd, @@ -44,3 +48,17 @@ void pack_objects_args_release(struct pack_objects_args *args) free(args->threads); list_objects_filter_release(&args->filter_options); } + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} diff --git a/repack.h b/repack.h index 3f7ec20735..a62bfa2ff9 100644 --- a/repack.h +++ b/repack.h @@ -28,4 +28,7 @@ void prepare_pack_objects(struct child_process *cmd, const char *out); void pack_objects_args_release(struct pack_objects_args *args); +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + #endif /* REPACK_H */ From fc808b39a40939714e26d355f2a01e995e65f871 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:16 -0400 Subject: [PATCH 17/50] builtin/repack.c: pass "packdir" when removing packs builtin/repack.c defines a static "packdir" to instruct pack-objects on where to write any new packfiles. This is also the directory scanned when removing any packfiles which were made redundant by the latest repack. Prepare to move the "existing_packs_remove_redundant" function to its own compilation unit by passing in this information as a parameter to that function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 31137cf711..c5a88eda12 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,7 +209,8 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, } static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs) + struct string_list *packs, + const char *packdir) { struct string_list_item *item; for_each_string_list_item(item, packs) { @@ -219,10 +220,13 @@ static void remove_redundant_packs_1(struct repository *repo, } } -static void existing_packs_remove_redundant(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) { - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); } static void existing_packs_release(struct existing_packs *existing) @@ -1659,7 +1663,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, From 92e1e725f1d165a5cfd94fcab98e359b86388a4c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:19 -0400 Subject: [PATCH 18/50] builtin/repack.c: avoid unnecessary numeric casts in existing_packs There are a couple of spots that cause warnings within the existing_packs API without DISABLE_SIGN_COMPARE_WARNINGS under DEVELOPER=1 mode. In both cases, we have int values that are being compared against size_t ones. Neither of these two cases are incorrect, and the cast is completely OK in practice. But both are unnecessary, since: - in existing_packs_mark_for_deletion_1(), 'hexsz' should be defined as a size_t anyway, since algop->hexsz is. - in existing_packs_collect(), 'i' should be defined as a size_t since it is counting up to the value of a string_list's 'nr' field. (This patch is a little bit of noise, but I would rather see us squelch these warnings ahead of moving the existing_packs API into a separate compilation unit to avoid having to define DISABLE_SIGN_COMPARE_WARNINGS in repack.c.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index c5a88eda12..e13943b637 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -156,7 +156,7 @@ static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop struct string_list *list) { struct string_list_item *item; - const int hexsz = algop->hexsz; + const size_t hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -250,7 +250,7 @@ static void existing_packs_collect(struct existing_packs *existing, struct strbuf buf = STRBUF_INIT; for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - int i; + size_t i; const char *base; if (!p->pack_local) From f2bfe705d9cbffa61f9fd129d0a7b913b832440e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:23 -0400 Subject: [PATCH 19/50] repack: remove 'existing_packs' API from the builtin The repack builtin defines an API for keeping track of which packs were found in the repository at the beginning of the repack operation. This is used to classify what state a pack was in (kept, non-kept, or cruft), and is also used to mark which packs to delete (or keep) at the end of a repack operation. Now that the prerequisite refactoring is complete, this API is isolated enough that it can be moved out to repack.ch and removed from the builtin entirely. As a result, some of its functions become static within repack.c, cleaning up the visible API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 173 ----------------------------------------------- repack.c | 157 ++++++++++++++++++++++++++++++++++++++++++ repack.h | 35 ++++++++++ 3 files changed, 192 insertions(+), 173 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e13943b637..a168c88791 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,7 +3,6 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" @@ -108,178 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct existing_packs { - struct repository *repo; - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int existing_packs_has_non_kept(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void existing_pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void existing_pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int existing_pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void existing_packs_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int existing_pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const size_t hexsz = algop->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (existing_pack_is_retained(item)) { - existing_pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - existing_pack_mark_for_deletion(item); - } - } -} - -static void existing_packs_retain_cruft(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - existing_packs_mark_retained(item); - strbuf_release(&buf); -} - -static void existing_packs_mark_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - existing_packs_mark_for_deletion_1(algop, names, - &existing->non_kept_packs); - existing_packs_mark_for_deletion_1(algop, names, - &existing->cruft_packs); -} - -static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs, - const char *packdir) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!existing_pack_is_marked_for_deletion(item)) - continue; - repack_remove_redundant_pack(repo, packdir, item->string); - } -} - -static void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) -{ - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void existing_packs_collect(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - size_t i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index 3aaa351b5b..9182e1c50b 100644 --- a/repack.c +++ b/repack.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "dir.h" #include "midx.h" #include "odb.h" #include "packfile.h" @@ -62,3 +63,159 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + size_t i; + const char *base; + + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); +} diff --git a/repack.h b/repack.h index a62bfa2ff9..19796e2243 100644 --- a/repack.h +++ b/repack.h @@ -2,6 +2,7 @@ #define REPACK_H #include "list-objects-filter-options.h" +#include "string-list.h" struct pack_objects_args { char *window; @@ -31,4 +32,38 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + #endif /* REPACK_H */ From 93876709017308bfe5fbdd026778425018aa3a7f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:27 -0400 Subject: [PATCH 20/50] builtin/repack.c: rename "struct generated_pack_data" The name "generated_pack_data" is somewhat redundant, since the contents of the struct *is* the data associated with the generated pack. Rename the structure to just "generated_pack", resulting in less awkward function names, like "generated_pack_has_ext()" which is preferable to "generated_pack_data_has_ext()". Rename a few related functions to align with the convention that functions to do with a struct "S" should be prefixed with "S_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a168c88791..a4d80b6b04 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -146,15 +146,15 @@ static struct { {".idx"}, }; -struct generated_pack_data { +struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack_data *populate_pack_exts(const char *name) +static struct generated_pack *generated_pack_populate(const char *name) { struct stat statbuf; struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { @@ -164,21 +164,21 @@ static struct generated_pack_data *populate_pack_exts(const char *name) if (stat(path.buf, &statbuf)) continue; - data->tempfiles[i] = register_tempfile(path.buf); + pack->tempfiles[i] = register_tempfile(path.buf); } strbuf_release(&path); - return data; + return pack; } -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) +static int generated_pack_has_ext(const struct generated_pack *pack, + const char *ext) { int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { if (strcmp(exts[i].name, ext)) continue; - return !!data->tempfiles[i]; + return !!pack->tempfiles[i]; } BUG("unknown pack extension: '%s'", ext); } @@ -239,7 +239,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = populate_pack_exts(item->string); + item->util = generated_pack_populate(item->string); free(promisor_name); } @@ -780,8 +780,8 @@ static int write_midx_included_packs(struct string_list *include, * will suffice, so pick the first one.) */ for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) continue; strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", @@ -864,7 +864,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); + item->util = generated_pack_populate(line.buf); } } fclose(out); @@ -1435,7 +1435,7 @@ int cmd_repack(int argc, * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; + struct generated_pack *pack = item->util; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname; @@ -1443,8 +1443,8 @@ int cmd_repack(int argc, fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); struct stat statbuffer; if (!stat(fname_old, &statbuffer)) { @@ -1452,7 +1452,7 @@ int cmd_repack(int argc, chmod(fname_old, statbuffer.st_mode); } - if (rename_tempfile(&data->tempfiles[ext], fname)) + if (rename_tempfile(&pack->tempfiles[ext], fname)) die_errno(_("renaming pack to '%s' failed"), fname); } else if (!exts[ext].optional) die(_("pack-objects did not write a '%s' file for pack %s-%s"), From 59f0759d260d705a0e3ad0397c2412d2c16cddb3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:31 -0400 Subject: [PATCH 21/50] builtin/repack.c: factor our "generated_pack_install" Once all new packs are known to exist, 'repack' installs their contents from their temporary location into their permanent one. This is a semi-involved procedure for each pack, since for each extension (e.g., ".idx", ".pack", ".mtimes", and so on) we have to either: - adjust the filemode of the temporary file before renaming it into place, or - die() if we are missing a non-optional extension, or - unlink() any existing file for extensions that we did not generate (e.g., if a non-cruft pack we generated was identical to, say, a cruft pack which existed at the beginning of the process, we have to remove the ".mtimes" file). Extract this procedure into its own function, and call it "generated_pack_install"(). This will set us up for pulling this function out of the builtin entirely and making it part of the repack.h API, which will be done in a future commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 ++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a4d80b6b04..8c3a5f4f80 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -183,6 +183,38 @@ static int generated_pack_has_ext(const struct generated_pack *pack, BUG("unknown pack extension: '%s'", ext); } +static void generated_pack_install(struct generated_pack *pack, + const char *name) +{ + int ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} + static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) @@ -1045,7 +1077,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + int i, ret; int show_progress; char **midx_pack_names = NULL; size_t midx_pack_names_nr = 0; @@ -1434,35 +1466,9 @@ int cmd_repack(int argc, /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack *pack = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install((struct generated_pack *)item->util, + item->string); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) From d99a1faa9d99d9c6d3be80fd52cb59c136b477bb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:35 -0400 Subject: [PATCH 22/50] builtin/repack.c: pass "packtmp" to `generated_pack_populate()` In a similar spirit as previous commits, this function needs to know the temporary pack prefix, which it currently accesses through the static "packtmp" variable within builtin/repack.c. Pass it explicitly as a function parameter to facilitate moving this function out of builtin/repack.c entirely. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8c3a5f4f80..2141c43bd2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -150,7 +150,8 @@ struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack *generated_pack_populate(const char *name) +static struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) { struct stat statbuf; struct strbuf path = STRBUF_INIT; @@ -271,7 +272,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = generated_pack_populate(item->string); + item->util = generated_pack_populate(item->string, packtmp); free(promisor_name); } @@ -896,7 +897,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf); + item->util = generated_pack_populate(line.buf, packtmp); } } fclose(out); From 4bf53de93eedaf33fd9937da68b6e9d487a3b6d7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:39 -0400 Subject: [PATCH 23/50] builtin/repack.c: provide pack locations to `generated_pack_install()` Repeat what was done in the preceding commit for the `generated_pack_install()` function, which needs both "packdir" and "packtmp". (As an aside, it is somewhat unfortunate that the final three parameters to this function are all "const char *", making errors like passing "packdir" and "packtmp" in the wrong order easy. We could define a new structure here, but that may be too heavy-handed.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2141c43bd2..a4f0a19453 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -185,7 +185,8 @@ static int generated_pack_has_ext(const struct generated_pack *pack, } static void generated_pack_install(struct generated_pack *pack, - const char *name) + const char *name, + const char *packdir, const char *packtmp) { int ext; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { @@ -1469,7 +1470,7 @@ int cmd_repack(int argc, */ for_each_string_list_item(item, &names) generated_pack_install((struct generated_pack *)item->util, - item->string); + item->string, packdir, packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) From 494fee74dc4ea9561d5bbda63026bb1eb641c85c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:43 -0400 Subject: [PATCH 24/50] repack: remove 'generated_pack' API from the builtin Now that we have factored the "generated_pack" API, we can move it to repack.ch, further slimming down builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 83 ------------------------------------------------ repack.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 8 +++++ 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a4f0a19453..b7826e676b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -134,89 +134,6 @@ static int write_oid(const struct object_id *oid, return 0; } -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack *generated_pack_populate(const char *name, - const char *packtmp) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack *pack = xcalloc(1, sizeof(*pack)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - pack->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return pack; -} - -static int generated_pack_has_ext(const struct generated_pack *pack, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!pack->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void generated_pack_install(struct generated_pack *pack, - const char *name, - const char *packdir, const char *packtmp) -{ - int ext; - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", packdir, name, - exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), - fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, name); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } -} - static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) diff --git a/repack.c b/repack.c index 9182e1c50b..d8afdd352d 100644 --- a/repack.c +++ b/repack.c @@ -3,9 +3,11 @@ #include "midx.h" #include "odb.h" #include "packfile.h" +#include "path.h" #include "repack.h" #include "repository.h" #include "run-command.h" +#include "tempfile.h" void prepare_pack_objects(struct child_process *cmd, const struct pack_objects_args *args, @@ -219,3 +221,84 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); } + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h index 19796e2243..f37eb49524 100644 --- a/repack.h +++ b/repack.h @@ -66,4 +66,12 @@ void existing_packs_remove_redundant(struct existing_packs *existing, const char *packdir); void existing_packs_release(struct existing_packs *existing); +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + #endif /* REPACK_H */ From f4f36b2990f15806d499fa6aaaed5459694e87a9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:47 -0400 Subject: [PATCH 25/50] builtin/repack.c: pass "packtmp" to `repack_promisor_objects()` In a similar spirit as previous commit(s), pass the "packtmp" variable to "repack_promisor_objects()" as an explicit parameter of the function, preparing us to move this function in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index b7826e676b..ca7658e38f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int write_oid(const struct object_id *oid, static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, - struct string_list *names) + struct string_list *names, + const char *packtmp) { struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; @@ -1199,7 +1200,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(repo, &po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); if (existing_packs_has_non_kept(&existing) && delete_redundant && From 9b063b13e28a1ac153c1c54a283164900bb09624 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:51 -0400 Subject: [PATCH 26/50] builtin/repack.c: remove "repack_promisor_objects()" from the builtin Now that we have properly factored the portion of the builtin which is responsible for repacking promisor objects, we can move that function (and associated dependencies) out of the builtin entirely. Similar to previous extractions, this function is declared in repack.h, but implemented in a separate repack-promisor.c file. This is done to separate promisor-specific repacking functionality from generic repack utilities (like "existing_packs", and "generated_pack" APIs). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 95 ------------------------------------------ meson.build | 1 + repack-promisor.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++ 5 files changed, 108 insertions(+), 95 deletions(-) create mode 100644 repack-promisor.c diff --git a/Makefile b/Makefile index c0df6da237..2a01bd92dc 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index ca7658e38f..18c3df7200 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,101 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct write_oid_context { - struct child_process *cmd; - const struct git_hash_algo *algop; -}; - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct write_oid_context *ctx = data; - struct child_process *cmd = ctx->cmd; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static void repack_promisor_objects(struct repository *repo, - const struct pack_objects_args *args, - struct string_list *names, - const char *packtmp) -{ - struct write_oid_context ctx; - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - ctx.cmd = &cmd; - ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != repo->hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = generated_pack_populate(item->string, packtmp); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - struct pack_geometry { struct packed_git **pack; uint32_t pack_nr, pack_alloc; diff --git a/meson.build b/meson.build index 993e8f368f..1fbb8c52a6 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 0000000000..8bf42fc715 --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "run-command.h" +#include "hex.h" +#include "repository.h" +#include "packfile.h" +#include "path.h" +#include "pack.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.h b/repack.h index f37eb49524..19dc4fd738 100644 --- a/repack.h +++ b/repack.h @@ -74,4 +74,8 @@ int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); void generated_pack_install(struct generated_pack *pack, const char *name, const char *packdir, const char *packtmp); +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + #endif /* REPACK_H */ From ee590bc1f23df2a0034c09bb0779c3c0e8fe25b5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:54 -0400 Subject: [PATCH 27/50] builtin/repack.c: rename various pack_geometry functions Rename functions which work with 'struct pack_geometry' to begin with "pack_geometry_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 18c3df7200..2ce1ae3364 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -115,17 +115,17 @@ struct pack_geometry { int split_factor; }; -static uint32_t geometry_pack_weight(struct packed_git *p) +static uint32_t pack_geometry_weight(struct packed_git *p) { if (open_pack_index(p)) die(_("cannot open index for %s"), p->pack_name); return p->num_objects; } -static int geometry_cmp(const void *va, const void *vb) +static int pack_geometry_cmp(const void *va, const void *vb) { - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); if (aw < bw) return -1; @@ -134,7 +134,7 @@ static int geometry_cmp(const void *va, const void *vb) return 0; } -static void init_pack_geometry(struct pack_geometry *geometry, +static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { @@ -184,11 +184,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, geometry->pack_nr++; } - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); strbuf_release(&buf); } -static void split_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_split(struct pack_geometry *geometry) { uint32_t i; uint32_t split; @@ -208,13 +208,13 @@ static void split_pack_geometry(struct pack_geometry *geometry) struct packed_git *prev = geometry->pack[i - 1]; if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) + pack_geometry_weight(prev))) die(_("pack %s too large to consider in geometric " "progression"), prev->pack_name); - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) break; } @@ -242,9 +242,9 @@ static void split_pack_geometry(struct pack_geometry *geometry) for (i = 0; i < split; i++) { struct packed_git *p = geometry->pack[i]; - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); + total_size += pack_geometry_weight(p); } for (i = split; i < geometry->pack_nr; i++) { struct packed_git *ours = geometry->pack[i]; @@ -253,15 +253,15 @@ static void split_pack_geometry(struct pack_geometry *geometry) total_size)) die(_("pack %s too large to roll up"), ours->pack_name); - if (geometry_pack_weight(ours) < + if (pack_geometry_weight(ours) < geometry->split_factor * total_size) { if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) + pack_geometry_weight(ours))) die(_("pack %s too large to roll up"), ours->pack_name); split++; - total_size += geometry_pack_weight(ours); + total_size += pack_geometry_weight(ours); } else break; } @@ -269,7 +269,7 @@ static void split_pack_geometry(struct pack_geometry *geometry) geometry->split = split; } -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) +static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) { uint32_t i; @@ -304,9 +304,9 @@ static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) return NULL; } -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) +static void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -332,7 +332,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, strbuf_release(&buf); } -static void free_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_release(struct pack_geometry *geometry) { if (!geometry) return; @@ -599,7 +599,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(geometry); FILE *in; int ret; @@ -1063,8 +1063,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1324,8 +1324,8 @@ int cmd_repack(int argc, existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1352,7 +1352,7 @@ cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); + pack_geometry_release(&geometry); for (size_t i = 0; i < midx_pack_names_nr; i++) free(midx_pack_names[i]); free(midx_pack_names); From ce852b95800c81c17c08f7a45425cac52d208279 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:08:58 -0400 Subject: [PATCH 28/50] builtin/repack.c: pass 'pack_kept_objects' to `pack_geometry_init()` Prepare to move pack_geometry-related APIs to their own compilation unit by passing in the static "pack_kept_objects" variable directly as a parameter to the 'pack_geometry_init()' function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2ce1ae3364..60dce45556 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int pack_geometry_cmp(const void *va, const void *vb) static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args) + const struct pack_objects_args *args, + int pack_kept_objects) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -1063,7 +1064,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_init(&geometry, &existing, &po_args, + pack_kept_objects); pack_geometry_split(&geometry); } From 4738f57237927d34c7e13a1ff29c1000eda2ffca Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:02 -0400 Subject: [PATCH 29/50] builtin/repack.c: pass 'packdir' to `pack_geometry_remove_redundant()` For similar reasons as the preceding commit, pass the "packdir" variable directly to `pack_geometry_remove_redundant()` as a parameter to the function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 60dce45556..9e2523d948 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -307,7 +307,8 @@ static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geo static void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, - struct existing_packs *existing) + struct existing_packs *existing, + const char *packdir) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -1327,7 +1328,7 @@ int cmd_repack(int argc, if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing); + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); From eab133767233eaeb9bd2c78108f20059e4ae7675 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:06 -0400 Subject: [PATCH 30/50] repack: remove pack_geometry API from the builtin Now that the pack_geometry API is fully factored and isolated from the rest of the builtin, declare it within repack.h and move its implementation to "repack-geometry.c" as a separate component. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 235 ---------------------------------------------- meson.build | 1 + repack-geometry.c | 233 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 20 ++++ 5 files changed, 255 insertions(+), 235 deletions(-) create mode 100644 repack-geometry.c diff --git a/Makefile b/Makefile index 2a01bd92dc..3ee8d27dba 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-geometry.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index 9e2523d948..b49e2dab9a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,241 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t pack_geometry_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int pack_geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), - bw = pack_geometry_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void pack_geometry_init(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); - strbuf_release(&buf); -} - -static void pack_geometry_split(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - pack_geometry_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * pack_geometry_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += pack_geometry_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - pack_geometry_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += pack_geometry_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void pack_geometry_remove_redundant(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing, - const char *packdir) -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex_algop(p->hash, - algop))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void pack_geometry_release(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, diff --git a/meson.build b/meson.build index 1fbb8c52a6..47b05089ee 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-geometry.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 0000000000..a879f2fe49 --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,233 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "repack.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack.h b/repack.h index 19dc4fd738..cea7969ae4 100644 --- a/repack.h +++ b/repack.h @@ -78,4 +78,24 @@ void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names, const char *packtmp); +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + #endif /* REPACK_H */ From d5adcba5ee1e78d85ff60520862c2c4a3dfb3d9c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:10 -0400 Subject: [PATCH 31/50] builtin/repack.c: remove ref snapshotting from builtin When writing a MIDX, 'git repack' takes a snapshot of the repository's references and writes the result out to a file, which it then passes to 'git multi-pack-index write' via the '--refs-snapshot'. This is done in order to make bitmap selections with respect to what we are packing, thus avoiding a race where an incoming reference update causes us to try and write a bitmap for a commit not present in the MIDX. Extract this functionality out into a new repack-midx.c compilation unit, and expose the necessary functions via the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 68 ------------------------------------------ meson.build | 1 + repack-midx.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 +++ 5 files changed, 83 insertions(+), 68 deletions(-) create mode 100644 repack-midx.c diff --git a/Makefile b/Makefile index 3ee8d27dba..b214277163 100644 --- a/Makefile +++ b/Makefile @@ -1138,6 +1138,7 @@ LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index b49e2dab9a..5a1f1cb562 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -178,74 +178,6 @@ static int midx_has_unknown_packs(char **midx_pack_names, return 0; } -struct midx_snapshot_ref_data { - struct repository *repo; - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(repo); - - data.repo = repo; - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(repo), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(repo), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - static void midx_included_packs(struct string_list *include, struct existing_packs *existing, char **midx_pack_names, diff --git a/meson.build b/meson.build index 47b05089ee..0423ed30c4 100644 --- a/meson.build +++ b/meson.build @@ -464,6 +464,7 @@ libgit_sources = [ 'remote.c', 'repack.c', 'repack-geometry.c', + 'repack-midx.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 0000000000..354df729a5 --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,77 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} diff --git a/repack.h b/repack.h index cea7969ae4..803e129224 100644 --- a/repack.h +++ b/repack.h @@ -98,4 +98,8 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry, const char *packdir); void pack_geometry_release(struct pack_geometry *geometry); +struct tempfile; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); + #endif /* REPACK_H */ From 3fc50b4f4f667f2c47d0581d879a3953a51cc592 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:14 -0400 Subject: [PATCH 32/50] builtin/repack.c: extract opts struct for 'write_midx_included_packs()' The function 'write_midx_included_packs()', which is responsible for writing a new MIDX with a given set of included packs, currently takes a list of six arguments. In order to extract this function out of the builtin, we have to pass in a few additional parameters, like 'midx_must_contain_cruft' and 'packdir', which are currently declared as static variables within the builtin/repack.c compilation unit. Instead of adding additional parameters to `write_midx_included_packs()` extract out an "opts" struct that names these parameters, and pass a pointer to that, making it less cumbersome to add additional parameters. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5a1f1cb562..e4b8aa2c6f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,6 +107,17 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +struct repack_write_midx_opts { + struct string_list *include; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, @@ -290,19 +301,15 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret; - if (!include->nr) + if (!opts->include->nr) return 0; cmd.in = -1; @@ -311,18 +318,18 @@ static int write_midx_included_packs(struct string_list *include, strvec_push(&cmd.args, "multi-pack-index"); strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - if (show_progress) + if (opts->show_progress) strvec_push(&cmd.args, "--progress"); else strvec_push(&cmd.args, "--no-progress"); - if (write_bitmaps) + if (opts->write_bitmaps) strvec_push(&cmd.args, "--bitmap"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", pack_basename(preferred)); - else if (names->nr) { + else if (opts->names->nr) { /* The largest pack was repacked, meaning that either * one or two packs exist depending on whether the * repository has a cruft pack or not. @@ -335,7 +342,7 @@ static int write_midx_included_packs(struct string_list *include, * `--max-pack-size` was given, but any one of them * will suffice, so pick the first one.) */ - for_each_string_list_item(item, names) { + for_each_string_list_item(item, opts->names) { struct generated_pack *pack = item->util; if (generated_pack_has_ext(pack, ".mtimes")) continue; @@ -355,15 +362,16 @@ static int write_midx_included_packs(struct string_list *include, ; } - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); ret = start_command(&cmd); if (ret) return ret; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) + for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); @@ -1001,15 +1009,23 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; + struct repack_write_midx_opts opts = { + .include = &include, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; midx_included_packs(&include, &existing, midx_pack_names, midx_pack_names_nr, &names, &geometry); - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); + ret = write_midx_included_packs(&opts); if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); + remove_redundant_bitmaps(&include, opts.packdir); string_list_clear(&include, 0); From c64f873571b85878d0451d74c19d96109fbb3e0c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:18 -0400 Subject: [PATCH 33/50] builtin/repack.c: use a string_list for 'midx_pack_names' When writing a new MIDX, repack must determine whether or not there are any packs in the MIDX it is replacing (if one exists) that are not somehow represented in the new MIDX (e.g., either by preserving the pack verbatim, or rolling it up as part of a geometric repack, etc.). In order to do this, it keeps track of a list of pack names from the MIDX present in the repository at the start of the repack operation. Since we manipulate and close the object store, we cannot rely on the repository's in-core representation of the MIDX, since this is subject to change and/or go away. When this behavior was introduced in 5ee86c273b (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), we maintained an array of character pointers instead of using a convenience API, such as string-list.h. Store the list of MIDX pack names in a string_list, thereby reducing the number of parameters we have to pass to `midx_has_unknown_packs()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e4b8aa2c6f..8ae56b05e9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,18 +118,17 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, +static int midx_has_unknown_packs(struct string_list *midx_pack_names, struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { - size_t i; + struct string_list_item *item; string_list_sort(include); - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; + for_each_string_list_item(item, midx_pack_names) { + const char *pack_name = item->string; /* * Determine whether or not each MIDX'd pack from the existing @@ -191,8 +190,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, + struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -247,8 +245,8 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { + midx_has_unknown_packs(midx_pack_names, include, geometry, + existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -606,13 +604,12 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; + struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ struct repack_config_ctx config_ctx; @@ -985,13 +982,12 @@ int cmd_repack(int argc, struct multi_pack_index *m = get_multi_pack_index(repo->objects->sources); - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); + for (; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + string_list_append(&midx_pack_names, + m->pack_names[i]); + } + } } close_object_store(repo->objects); @@ -1019,8 +1015,8 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); + midx_included_packs(&include, &existing, &midx_pack_names, + &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1067,11 +1063,9 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); + string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); From 3e4f895ce8853f859d3b15633a8f8a64f1f52ba8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:22 -0400 Subject: [PATCH 34/50] repack: keep track of MIDX pack names using existing_packs Instead of storing the list of MIDX pack names separately, let's inline it into the existing_packs struct, further reducing the number of parameters we have to pass around. This amounts to adding a new string_list to the existing_packs struct, and populating it via `existing_packs_collect()`. This is fairly straightforward to do, since we are already looping over all packs, all we need to do is: if (p->multi_pack_index) string_list_append(&existing->midx_packs, pack_basename(p)); Note, however, that this check *must* come before other conditions where we discard and do not keep track of a pack, including the condition "if (!p->pack_local)" immediately below. This is because the existing routine which collects MIDX pack names does so blindly, and does not discard, for example, non-local packs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 ++++---------------------- repack.c | 5 +++++ repack.h | 1 + 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8ae56b05e9..251dd08b0a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,8 +118,7 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(struct string_list *midx_pack_names, - struct string_list *include, +static int midx_has_unknown_packs(struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { @@ -127,7 +126,7 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, string_list_sort(include); - for_each_string_list_item(item, midx_pack_names) { + for_each_string_list_item(item, &existing->midx_packs) { const char *pack_name = item->string; /* @@ -190,7 +189,6 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -245,8 +243,7 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, include, geometry, - existing)) { + midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -604,7 +601,6 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; - struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; @@ -978,18 +974,6 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(repo->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(repo->objects->sources); - - for (; m; m = m->base_midx) { - for (uint32_t i = 0; i < m->num_packs; i++) { - string_list_append(&midx_pack_names, - m->pack_names[i]); - } - } - } - close_object_store(repo->objects); /* @@ -1015,8 +999,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &midx_pack_names, - &names, &geometry); + midx_included_packs(&include, &existing, &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1063,7 +1046,6 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); - string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); pack_objects_args_release(&po_args); diff --git a/repack.c b/repack.c index d8afdd352d..1d485e0112 100644 --- a/repack.c +++ b/repack.c @@ -80,6 +80,9 @@ void existing_packs_collect(struct existing_packs *existing, size_t i; const char *base; + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); if (!p->pack_local) continue; @@ -104,6 +107,7 @@ void existing_packs_collect(struct existing_packs *existing, string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); strbuf_release(&buf); } @@ -220,6 +224,7 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->kept_packs, 0); string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); } static struct { diff --git a/repack.h b/repack.h index 803e129224..6aa5b4e0f0 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct existing_packs { struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; + struct string_list midx_packs; }; #define EXISTING_PACKS_INIT { \ From d13c9522c1683c21a4aa485c8a6aa40e4b9c06af Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:26 -0400 Subject: [PATCH 35/50] builtin/repack.c: reorder `remove_redundant_bitmaps()` The next commit will inline the call to `remove_redundant_bitmaps()` into `write_midx_included_packs()`. Reorder these two functions to avoid a forward declaration to `remove_redundant_bitmaps()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 58 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 251dd08b0a..957a9b5f9e 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -296,6 +296,35 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -373,35 +402,6 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) return finish_command(&cmd); } -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, From 168c70c6899debf1eace0bd7e94ef8ad20c8521f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:30 -0400 Subject: [PATCH 36/50] builtin/repack.c: inline `remove_redundant_bitmaps()` After writing a new MIDX, the repack command removes any bitmaps belonging to packs which were written into the MIDX. This is currently done in a separate function outside of `write_midx_included_packs()`, which forces the caller to keep track of the set of packs written into the MIDX. Prepare to no longer require the caller to keep track of such information by inlining the clean-up into `write_midx_included_packs()`. Future commits will make the caller oblivious to the set of packs included in the MIDX altogether. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 957a9b5f9e..b55c8934e8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -331,10 +331,10 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; - int ret; + int ret = 0; if (!opts->include->nr) - return 0; + goto done; cmd.in = -1; cmd.git_cmd = 1; @@ -392,14 +392,18 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) ret = start_command(&cmd); if (ret) - return ret; + goto done; in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); - return finish_command(&cmd); + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(opts->include, opts->packdir); + return ret; } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, @@ -1003,9 +1007,6 @@ int cmd_repack(int argc, ret = write_midx_included_packs(&opts); - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, opts.packdir); - string_list_clear(&include, 0); if (ret) From e500d06af50bc3c879fda8139fcb23177d4318dc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:35 -0400 Subject: [PATCH 37/50] builtin/repack.c: pass `repack_write_midx_opts` to `midx_included_packs` Instead of passing individual parameters (in this case, "existing", "names", and "geometry") to `midx_included_packs()`, pass a pointer to a `repack_write_midx_opts` structure instead. Besides reducing the number of parameters necessary to call the `midx_included_packs` function, this refactoring sets us up nicely to inline the call to `midx_included_packs()` into `write_midx_included_packs()`, thus making the caller (in this case, `cmd_repack()`) oblivious to the set of packs being written into the MIDX. In order to do this, `repack_write_midx_opts` has to keep track of the set of existing packs, so add an additional field to point to that set. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index b55c8934e8..bd3034a4f0 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,6 +108,7 @@ static int repack_config(const char *var, const char *value, } struct repack_write_midx_opts { + struct existing_packs *existing; struct string_list *include; struct pack_geometry *geometry; struct string_list *names; @@ -188,10 +189,11 @@ static int midx_has_unknown_packs(struct string_list *include, } static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - struct string_list *names, - struct pack_geometry *geometry) + struct repack_write_midx_opts *opts) { + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; struct string_list_item *item; struct strbuf buf = STRBUF_INIT; @@ -242,7 +244,7 @@ static void midx_included_packs(struct string_list *include, } } - if (midx_must_contain_cruft || + if (opts->midx_must_contain_cruft || midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see @@ -994,6 +996,7 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { + .existing = &existing, .include = &include, .geometry = &geometry, .names = &names, @@ -1003,7 +1006,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); From fff699f45d590619939d16d36c44e4a5a099744b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:40 -0400 Subject: [PATCH 38/50] builtin/repack.c: inline packs within `write_midx_included_packs()` To write a MIDX at the end of a repack operation, 'git repack' presently computes the set of packs to write into the MIDX, before invoking `write_midx_included_packs()` with a `string_list` containing those packs. The logic for computing which packs are supposed to appear in the resulting MIDX is within `midx_included_packs()`, where it is aware of details like which cruft pack(s) were written/combined, if/how we did a geometric repack, etc. Computing this list ourselves before providing it to the sole function to make use of that list `write_midx_included_packs()` is somewhat awkward. In the future, repack will learn how to write incremental MIDXs, which will use a very different pack selection routine. Instead of doing something like: struct string_list included_packs = STRING_LIST_INIT_DUP; if (incremental) { midx_incremental_included_packs(&included_packs, ...): write_midx_incremental_included_packs(&included_packs, ...); } else { midx_included_packs(&included_packs, ...): write_midx_included_packs(&included_packs, ...); } in the future, let's have each function which writes a MIDX be responsible for itself computing the list of included packs. Inline the declaration and initialization of `included_packs` into the `write_midx_included_packs()` function itself, and repeat that pattern in the future when we introduce new ways to write MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index bd3034a4f0..e2acba6312 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -109,7 +109,6 @@ static int repack_config(const char *var, const char *value, struct repack_write_midx_opts { struct existing_packs *existing; - struct string_list *include; struct pack_geometry *geometry; struct string_list *names; const char *refs_snapshot; @@ -330,12 +329,14 @@ static void remove_redundant_bitmaps(struct string_list *include, static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret = 0; - if (!opts->include->nr) + midx_included_packs(&include, opts); + if (!include.nr) goto done; cmd.in = -1; @@ -397,14 +398,17 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, opts->include) + for_each_string_list_item(item, &include) fprintf(in, "%s\n", item->string); fclose(in); ret = finish_command(&cmd); done: if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(opts->include, opts->packdir); + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + return ret; } @@ -994,10 +998,8 @@ int cmd_repack(int argc, existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { .existing = &existing, - .include = &include, .geometry = &geometry, .names = &names, .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1006,12 +1008,9 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); - string_list_clear(&include, 0); - if (ret) goto cleanup; } From 2f8fb7d1da04f09b3b66ebffe9405db8587f8eae Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:46 -0400 Subject: [PATCH 39/50] repack: 'write_midx_included_packs' API from the builtin Now that we have sufficiently cleaned up the write_midx_included_packs() function, we can move it (along with the struct repack_write_midx_opts) out of the builtin, and into the repack.h header. Since this function (and the static ones that it depends on) are MIDX-specific details of the repacking process, move them to the repack-midx.c compilation unit instead of the general repack.c one. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 305 ----------------------------------------------- repack-midx.c | 295 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 12 ++ 3 files changed, 307 insertions(+), 305 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e2acba6312..5fed79e826 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,311 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct repack_write_midx_opts { - struct existing_packs *existing; - struct pack_geometry *geometry; - struct string_list *names; - const char *refs_snapshot; - const char *packdir; - int show_progress; - int write_bitmaps; - int midx_must_contain_cruft; -}; - -static int midx_has_unknown_packs(struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - struct string_list_item *item; - - string_list_sort(include); - - for_each_string_list_item(item, &existing->midx_packs) { - const char *pack_name = item->string; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !existing_pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -static void midx_included_packs(struct string_list *include, - struct repack_write_midx_opts *opts) -{ - struct existing_packs *existing = opts->existing; - struct pack_geometry *geometry = opts->geometry; - struct string_list *names = opts->names; - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (opts->midx_must_contain_cruft || - midx_has_unknown_packs(include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `existing_packs_mark_for_deletion()` when - * doing an all-into-one repack). - */ - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int write_midx_included_packs(struct repack_write_midx_opts *opts) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list include = STRING_LIST_INIT_DUP; - struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; - int ret = 0; - - midx_included_packs(&include, opts); - if (!include.nr) - goto done; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (opts->names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, opts->names) { - struct generated_pack *pack = item->util; - if (generated_pack_has_ext(pack, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (opts->refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", - opts->refs_snapshot); - - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); -done: - if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(&include, opts->packdir); - - string_list_clear(&include, 0); - - return ret; -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, diff --git a/repack-midx.c b/repack-midx.c index 354df729a5..6f6202c5bc 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -6,6 +6,7 @@ #include "oidset.h" #include "pack-bitmap.h" #include "refs.h" +#include "run-command.h" #include "tempfile.h" struct midx_snapshot_ref_data { @@ -75,3 +76,297 @@ void midx_snapshot_refs(struct repository *repo, struct tempfile *f) oidset_clear(&data.seen); } + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack.h b/repack.h index 6aa5b4e0f0..25a31ac0a0 100644 --- a/repack.h +++ b/repack.h @@ -101,6 +101,18 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); #endif /* REPACK_H */ From 9251edd257129ad3b77c63260a77b5bbd27e674a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:51 -0400 Subject: [PATCH 40/50] builtin/repack.c: introduce `struct write_pack_opts` There are various functions within the 'repack' builtin which are responsible for writing different kinds of packs. They include: - `static int write_filtered_pack(...)` - `static int write_cruft_pack(...)` as well as the function `finish_pack_objects_cmd()`, which is responsible for finalizing a new pack write, and recording the checksum of its contents in the 'names' list. Both of these `write_` functions have a few things in common. They both take a pointer to the 'pack_objects_args' struct, as well as a pair of character pointers for `destination` and `pack_prefix`. Instead of repeating those arguments for each function, let's extract an options struct called "write_pack_opts" which has these three parameters as member fields. While we're at it, add fields for "packdir," and "packtmp", both of which are static variables within the builtin, and need to be read from within these two functions. This will shorten the list of parameters that callers have to provide to `write_filtered_pack()`, avoid ambiguity when passing multiple variables of the same type, and provide a unified interface for the two functions mentioned earlier. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 28 +++++++++++++++------------- repack.h | 8 ++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5fed79e826..6df7c88085 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -138,9 +138,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, return finish_command(cmd); } -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) { @@ -150,9 +148,9 @@ static int write_filtered_pack(const struct pack_objects_args *args, int ret; const char *caret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--stdin-packs"); @@ -175,7 +173,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -665,14 +663,18 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .pack_prefix = find_pack_prefix(packdir, packtmp), + .packdir = packdir, + .packtmp = packtmp, + }; - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } diff --git a/repack.h b/repack.h index 25a31ac0a0..6ef503f623 100644 --- a/repack.h +++ b/repack.h @@ -32,6 +32,14 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *pack_prefix; + const char *packdir; + const char *packtmp; +}; + struct repository; struct packed_git; From f9568ff63ff2770a44cea7a996f9d19ca1de414c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:09:57 -0400 Subject: [PATCH 41/50] builtin/repack.c: use `write_pack_opts` within `write_cruft_pack()` Similar to the changes made in the previous commit to `write_filtered_pack()`, teach `write_cruft_pack()` to take a `write_pack_opts` struct and use that where possible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6df7c88085..501359c580 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,9 +221,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, strbuf_release(&buf); } -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_cruft_pack(struct write_pack_opts *opts, const char *cruft_expiration, unsigned long combine_cruft_below_size, struct string_list *names, @@ -234,9 +232,9 @@ static int write_cruft_pack(const struct pack_objects_args *args, FILE *in; int ret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--cruft"); if (cruft_expiration) @@ -267,7 +265,7 @@ static int write_cruft_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -599,6 +597,13 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .pack_prefix = pack_prefix, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -615,8 +620,7 @@ int cmd_repack(int argc, cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -651,11 +655,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; From 3b7102b6ed9127730ec4e01bf9ee4c383b421081 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:02 -0400 Subject: [PATCH 42/50] repack: move `find_pack_prefix()` out of the builtin Both callers within the repack builtin which call functions that take a 'write_pack_opts' structure have the following pattern: struct write_pack_opts opts = { .packdir = packdir, .packtmp = packtmp, .pack_prefix = find_pack_prefix(packdir, packtmp), /* ... */ }; int ret = write_some_kind_of_pack(&opts, /* ... */); , but both "packdir" and "packtmp" are fields within the write_pack_opts struct itself! Instead of also computing the pack_prefix ahead of time, let's have the callees compute it themselves by moving `find_pack_prefix()` out of the repack builtin, and have it take a write_pack_opts pointer instead of the "packdir" and "packtmp" fields directly. This avoids the callers having to do some prep work that is common between the two of them, but also avoids the potential pitfall of accidentally writing: .pack_prefix = find_pack_prefix(packtmp, packdir), (which is well-typed) when the caller meant to instead write: .pack_prefix = find_pack_prefix(packdir, packtmp), Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 ++++---------------- repack.c | 11 +++++++++++ repack.h | 3 ++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 501359c580..4d86920618 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -149,6 +149,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, const char *caret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -173,7 +174,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -233,6 +234,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, int ret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -265,7 +267,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -283,17 +285,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, local); } -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, @@ -596,11 +587,9 @@ int cmd_repack(int argc, } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); struct write_pack_opts opts = { .po_args = &cruft_po_args, .destination = packtmp, - .pack_prefix = pack_prefix, .packtmp = packtmp, .packdir = packdir, }; @@ -667,7 +656,6 @@ int cmd_repack(int argc, struct write_pack_opts opts = { .po_args = &po_args, .destination = filter_to, - .pack_prefix = find_pack_prefix(packdir, packtmp), .packdir = packdir, .packtmp = packtmp, }; diff --git a/repack.c b/repack.c index 1d485e0112..c4326a532d 100644 --- a/repack.c +++ b/repack.c @@ -66,6 +66,17 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, strbuf_release(&buf); } +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 6ef503f623..46d2312fa9 100644 --- a/repack.h +++ b/repack.h @@ -35,11 +35,12 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, struct write_pack_opts { struct pack_objects_args *po_args; const char *destination; - const char *pack_prefix; const char *packdir; const char *packtmp; }; +const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); + struct repository; struct packed_git; From cbd91c8d15ca552604c9489371dd77c652b009ef Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:08 -0400 Subject: [PATCH 43/50] repack: extract `write_pack_opts_is_local()` Similar to the previous commit, the functions `write_cruft_pack()` and `write_filtered_pack()` both compute a "local" variable via the exact same mechanism: const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); Not only does this cause us to repeat the same pair of lines, it also introduces an unnecessary "scratch" variable that is common between both functions. Instead of repeating ourselves, let's extract that functionality into a new function in the repack.h API called "write_pack_opts_is_local()". That function takes a pointer to a "struct write_pack_opts" (which has as fields both "destination" and "packdir"), and can encapsulate the dangling "scratch" field. Extract that function and make it visible within the repack.h API, and use it within both `write_cruft_pack()` and `write_filtered_pack()`. The remaining duplication (that is, that both `write_cruft_pack()` and `write_filtered_pack()` still both call `write_pack_opts_is_local()`) will be addressed in the following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++---- repack.c | 6 ++++++ repack.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4d86920618..be8e6689fc 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -147,8 +147,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -232,8 +231,7 @@ static int write_cruft_pack(struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); diff --git a/repack.c b/repack.c index c4326a532d..7af297ae48 100644 --- a/repack.c +++ b/repack.c @@ -77,6 +77,12 @@ const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts) return pack_prefix; } +int write_pack_opts_is_local(struct write_pack_opts *opts) +{ + const char *scratch; + return skip_prefix(opts->destination, opts->packdir, &scratch); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 46d2312fa9..16f2de2ea9 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct write_pack_opts { }; const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); +int write_pack_opts_is_local(struct write_pack_opts *opts); struct repository; struct packed_git; From 6c5b90171fae963581ecfd9e30ed2e04bd0b02c3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:14 -0400 Subject: [PATCH 44/50] builtin/repack.c: pass `write_pack_opts` to `finish_pack_objects_cmd()` To prepare to move the `finish_pack_objects_cmd()` function out of the builtin and into the repack.h API, there are a couple of things we need to do first: - First, let's take advantage of `write_pack_opts_is_local()` function introduced in the previous commit instead of passing "local" explicitly. - Let's also avoid referring to the static 'packtmp' field within builtin/repack.c by instead accessing it through the write_pack_opts argument. There are three callers which need to adjust themselves in order to account for this change. The callers which reside in write_cruft_pack() and write_filtered_pack() both already have an "opts" in scope, so they can pass it through transparently. The other call (at the bottom of `cmd_repack()`) needs to initialize its own write_pack_opts to pass the necessary fields over to the direct call to `finish_pack_objects_cmd()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index be8e6689fc..8db95305c8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,11 +108,12 @@ static int repack_config(const char *var, const char *value, } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, struct child_process *cmd, - struct string_list *names, - int local) + struct string_list *names) { FILE *out; + int local = write_pack_opts_is_local(opts); struct strbuf line = STRBUF_INIT; out = xfdopen(cmd->out, "r"); @@ -128,7 +129,8 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, packtmp); + item->util = generated_pack_populate(line.buf, + opts->packtmp); } } fclose(out); @@ -147,7 +149,6 @@ static int write_filtered_pack(struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -183,8 +184,8 @@ static int write_filtered_pack(struct write_pack_opts *opts, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -231,7 +232,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - int local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -279,8 +279,8 @@ static int write_cruft_pack(struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } int cmd_repack(int argc, @@ -294,6 +294,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; + struct write_pack_opts opts = { 0 }; int i, ret; int show_progress; @@ -560,7 +561,10 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); + opts.packdir = packdir; + opts.destination = packdir; + opts.packtmp = packtmp; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, &names); if (ret) goto cleanup; From f6cc2aed8a9b9124719ba4bbf4f779eda298d2d8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:19 -0400 Subject: [PATCH 45/50] repack: move `finish_pack_objects_cmd()` out of the builtin In a similar spirit as the previous commit(s), now that the function `finish_pack_objects_cmd()` has no explicit dependencies within the repack builtin, let's extract it. This prepares us to extract the remaining two functions within the repack builtin that explicitly write packfiles, which are `write_cruft_pack()` and `write_filtered_pack()`, which will be done in the future commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 33 --------------------------------- repack.c | 33 +++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8db95305c8..836a006607 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,39 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int finish_pack_objects_cmd(const struct git_hash_algo *algop, - struct write_pack_opts *opts, - struct child_process *cmd, - struct string_list *names) -{ - FILE *out; - int local = write_pack_opts_is_local(opts); - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != algop->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, - opts->packtmp); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - static int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) diff --git a/repack.c b/repack.c index 7af297ae48..8a0e4789fa 100644 --- a/repack.c +++ b/repack.c @@ -83,6 +83,39 @@ int write_pack_opts_is_local(struct write_pack_opts *opts) return skip_prefix(opts->destination, opts->packdir, &scratch); } +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + int local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 16f2de2ea9..9351293233 100644 --- a/repack.h +++ b/repack.h @@ -42,6 +42,11 @@ struct write_pack_opts { const char *write_pack_opts_pack_prefix(struct write_pack_opts *opts); int write_pack_opts_is_local(struct write_pack_opts *opts); +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + struct repository; struct packed_git; From 7db10535cc5ed1eb18a07f9ce3639bbc7706610e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:25 -0400 Subject: [PATCH 46/50] repack: move `pack_kept_objects` to `struct pack_objects_args` The "pack_kept_objects" variable is defined as static to the repack builtin, but is inherently related to the pack-objects arguments that the builtin uses when generating new packs. Move that field into the "struct pack_objects_args", and shuffle around where we append the corresponding command-line option when preparing a pack-objects process. Specifically: - `write_cruft_pack()` always wants to pass "--honor-pack-keep", so explicitly set the `pack_kept_objects` field to "0" when initializing the `write_pack_opts` struct before calling `write_cruft_pack()`. - `write_filtered_pack()` no longer needs to handle writing the command-line option "--honor-pack-keep" when preparing a pack-objects process, since its call to `prepare_pack_objects()` will have already taken care of that. `write_filtered_pack()` also reads the `pack_kept_objects` field to determine whether to write the existing kept packs with a leading "^" character, so update that to read through the `po_args` pointer instead. - `cmd_repack()` also no longer has to write the "--honor-pack-keep" flag explicitly, since this is also handled via its call to `prepare_pack_objects()`. Since there is a default value for "pack_kept_objects" that relies on whether or not we are writing a bitmap (and not writing a MIDX), extract a default initializer for `struct pack_objects_args` that keeps this conditional default behavior. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 +++++++------------- repack-geometry.c | 5 ++--- repack.c | 2 ++ repack.h | 9 ++++++--- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 836a006607..9d89217b77 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -68,7 +67,7 @@ static int repack_config(const char *var, const char *value, return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -122,8 +121,6 @@ static int write_filtered_pack(struct write_pack_opts *opts, strvec_push(&cmd.args, "--stdin-packs"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for_each_string_list_item(item, &existing->kept_packs) strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); @@ -146,7 +143,7 @@ static int write_filtered_pack(struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; + caret = opts->po_args->pack_kept_objects ? "" : "^"; for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); @@ -208,7 +205,6 @@ static int write_cruft_pack(struct write_pack_opts *opts, strvec_pushf(&cmd.args, "--cruft-expiration=%s", cruft_expiration); - strvec_push(&cmd.args, "--honor-pack-keep"); strvec_push(&cmd.args, "--non-empty"); cmd.in = -1; @@ -333,7 +329,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -379,8 +375,8 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); @@ -421,8 +417,7 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args, - pack_kept_objects); + pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -431,8 +426,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -577,6 +570,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, diff --git a/repack-geometry.c b/repack-geometry.c index a879f2fe49..fd4a89a115 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -26,8 +26,7 @@ static int pack_geometry_cmp(const void *va, const void *vb) void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) + const struct pack_objects_args *args) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -42,7 +41,7 @@ void pack_geometry_init(struct pack_geometry *geometry, */ continue; - if (!pack_kept_objects) { + if (!args->pack_kept_objects) { /* * Any pack that has its pack_keep bit set will * appear in existing->kept_packs below, but diff --git a/repack.c b/repack.c index 8a0e4789fa..1982a48165 100644 --- a/repack.c +++ b/repack.c @@ -38,6 +38,8 @@ void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--quiet"); if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; cmd->out = -1; diff --git a/repack.h b/repack.h index 9351293233..4a1c4eb606 100644 --- a/repack.h +++ b/repack.h @@ -17,10 +17,14 @@ struct pack_objects_args { int name_hash_version; int path_walk; int delta_base_offset; + int pack_kept_objects; struct list_objects_filter_options filter_options; }; -#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} struct child_process; @@ -104,8 +108,7 @@ struct pack_geometry { void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects); + const struct pack_objects_args *args); void pack_geometry_split(struct pack_geometry *geometry); struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, From d13115475862d9f699ffd75a661227e198bdf805 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:30 -0400 Subject: [PATCH 47/50] repack: move `write_filtered_pack()` out of the builtin In a similar fashion as in previous commits, move the function `write_filtered_pack()` out of the builtin and into its own compilation unit. This function is now part of the repack.h API, but implemented in its own "repack-filtered.c" unit as it is a separate component from other kinds of repacking operations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 46 ------------------------------------------ meson.build | 1 + repack-filtered.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 5 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 repack-filtered.c diff --git a/Makefile b/Makefile index b214277163..ba4f3bbfa2 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o diff --git a/builtin/repack.c b/builtin/repack.c index 9d89217b77..a9fc09a24d 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,52 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int write_filtered_pack(struct write_pack_opts *opts, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = opts->po_args->pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { diff --git a/meson.build b/meson.build index 0423ed30c4..7124b158ae 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', 'repack-promisor.c', diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 0000000000..96c7b02bb6 --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index 4a1c4eb606..a7ddbe784b 100644 --- a/repack.h +++ b/repack.h @@ -133,4 +133,8 @@ struct repack_write_midx_opts { void midx_snapshot_refs(struct repository *repo, struct tempfile *f); int write_midx_included_packs(struct repack_write_midx_opts *opts); +int write_filtered_pack(struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + #endif /* REPACK_H */ From 6f8838e96ba9ccdacbbe4cb009f2e9beef43ca88 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:34 -0400 Subject: [PATCH 48/50] repack: move `write_cruft_pack()` out of the builtin In an identical fashion as the previous commit, move the function `write_cruft_pack()` into its own compilation unit, and make the function visible through the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 94 --------------------------------------------- meson.build | 1 + repack-cruft.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 6 +++ 5 files changed, 107 insertions(+), 94 deletions(-) create mode 100644 repack-cruft.c diff --git a/Makefile b/Makefile index ba4f3bbfa2..e3c4bf1b4a 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o diff --git a/builtin/repack.c b/builtin/repack.c index a9fc09a24d..9171ca66a7 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,100 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - existing_packs_retain_cruft(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(struct write_pack_opts *opts, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - int cmd_repack(int argc, const char **argv, const char *prefix, diff --git a/meson.build b/meson.build index 7124b158ae..39152b37ba 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-cruft.c', 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 0000000000..accb98bcdb --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,99 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index a7ddbe784b..15886c3488 100644 --- a/repack.h +++ b/repack.h @@ -137,4 +137,10 @@ int write_filtered_pack(struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names); +int write_cruft_pack(struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + #endif /* REPACK_H */ From e056dec3d6d09bba280b85d47ac1cd589c013e9f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 28 Sep 2025 18:10:38 -0400 Subject: [PATCH 49/50] builtin/repack.c: clean up unused `#include`s Over the past several dozen commits, we have moved a large amount of functionality out of the repack builtin and into other files like repack.c, repack-cruft.c, repack-filtered.c, repack-midx.c, and repack-promisor.c. These files specify the minimal set of `#include`s that they need to compile successfully, but we did not change the set of `#include`s in the repack builtin itself. Now that the code movement is complete, let's clean up that set of `#include`s and trim down the builtin to include the minimal amount of external headers necessary to compile. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 9171ca66a7..ad60c4290d 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -4,26 +4,17 @@ #include "builtin.h" #include "config.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" #include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 From c886af90f8d25bd73c841a9925b8f03e1b232910 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Sep 2025 15:40:59 -0700 Subject: [PATCH 50/50] SQUASH??? play well with other topics by preemptively including "repository.h" --- repack-geometry.c | 1 + 1 file changed, 1 insertion(+) diff --git a/repack-geometry.c b/repack-geometry.c index fd4a89a115..56d651c9ce 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -1,6 +1,7 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" +#include #include "repack.h" #include "hex.h" #include "packfile.h"