From bc2f6115693874b2e8feed80ff5539743d257920 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:46:48 -0400 Subject: [PATCH 1/9] t/helper: add 'test-tool bitmap write' subcommand In f16eb1c091 (pseudo-merge: fix disk reads from find_pseudo_merge(), 2026-03-31), we noted that `apply_pseudo_merges_for_commit()` is never triggered by the existing test suite, and that this bears further investigation. This patch is the first one to begin that investigation. The following patches will expose and fix a variety of bugs in the implementation of pseudo-merge bitmaps. In order to do so, however, many of these tests require very precise selection of which commits receive bitmaps and which do not. To date, there isn't a standard approach to easily facilitate this. Address this by introducing a `test-tool bitmap write` subcommand that writes a bitmap for a given packfile, reading the set of commits which should receive individual bitmaps from stdin like so: test-tool bitmap write " is the filename for a specific packfile (e.g., "pack-abc123.pack"), and "/path/to/commits.list" is a list of commit OIDs which will receive bitmaps. The helper respects `bitmapPseudoMerge.*` configuration for creating pseudo-merge bitmaps alongside the regular commit bitmaps. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-bitmap.c | 113 +++++++++++++++++++++++++++++++++++++++- t/t5310-pack-bitmaps.sh | 24 +++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c index 16a01669e4..381e9b58b2 100644 --- a/t/helper/test-bitmap.c +++ b/t/helper/test-bitmap.c @@ -2,7 +2,10 @@ #include "test-tool.h" #include "git-compat-util.h" +#include "hex.h" +#include "odb.h" #include "pack-bitmap.h" +#include "pseudo-merge.h" #include "setup.h" static int bitmap_list_commits(void) @@ -35,6 +38,111 @@ static int bitmap_dump_pseudo_merge_objects(uint32_t n) return test_bitmap_pseudo_merge_objects(the_repository, n); } +static int add_packed_object(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *_data) +{ + struct packing_data *packed = _data; + struct object_entry *entry; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type type; + + oi.typep = &type; + + entry = packlist_alloc(packed, oid); + entry->idx.offset = nth_packed_object_offset(pack, pos); + if (packed_object_info(pack, entry->idx.offset, &oi) < 0) + die("could not get type of object %s", + oid_to_hex(oid)); + oe_set_type(entry, type); + oe_set_in_pack(packed, entry, pack); + + return 0; +} + +static int idx_oid_cmp(const void *va, const void *vb) +{ + const struct pack_idx_entry *a = *(const struct pack_idx_entry **)va; + const struct pack_idx_entry *b = *(const struct pack_idx_entry **)vb; + + return oidcmp(&a->oid, &b->oid); +} + +static int bitmap_write(const char *basename) +{ + struct packed_git *p = NULL; + struct packing_data packed = { 0 }; + struct bitmap_writer writer; + struct pack_idx_entry **index; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + prepare_repo_settings(the_repository); + repo_for_each_pack(the_repository, p) { + if (!strcmp(pack_basename(p), basename)) + break; + } + + if (!p) + die("could not find pack '%s'", basename); + + if (open_pack_index(p)) + die("cannot open pack index for '%s'", p->pack_name); + + prepare_packing_data(the_repository, &packed); + + for_each_object_in_pack(p, add_packed_object, &packed, + ODB_FOR_EACH_OBJECT_PACK_ORDER); + + /* + * Build the index array now that data.packed.objects[] is + * fully allocated (packlist_alloc() may have reallocated it + * during the loop above). + */ + ALLOC_ARRAY(index, p->num_objects); + for (i = 0; i < p->num_objects; i++) + index[i] = &packed.objects[i].idx; + + bitmap_writer_init(&writer, the_repository, &packed, NULL); + bitmap_writer_build_type_index(&writer, index); + + while (strbuf_getline_lf(&buf, stdin) != EOF) { + struct object_id oid; + struct commit *c; + + if (get_oid_hex(buf.buf, &oid)) + die("invalid OID: %s", buf.buf); + + c = lookup_commit(the_repository, &oid); + if (!c || repo_parse_commit(the_repository, c)) + die("could not parse commit %s", buf.buf); + + bitmap_writer_push_commit(&writer, c, 0); + } + + select_pseudo_merges(&writer); + if (bitmap_writer_build(&writer) < 0) + die("failed to build bitmaps"); + + bitmap_writer_set_checksum(&writer, p->hash); + + QSORT(index, p->num_objects, idx_oid_cmp); + + strbuf_reset(&buf); + strbuf_addstr(&buf, p->pack_name); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".bitmap"); + bitmap_writer_finish(&writer, index, buf.buf, 0); + + bitmap_writer_free(&writer); + strbuf_release(&buf); + free(index); + clear_packing_data(&packed); + + return 0; +} + int cmd__bitmap(int argc, const char **argv) { setup_git_directory(); @@ -51,13 +159,16 @@ int cmd__bitmap(int argc, const char **argv) return bitmap_dump_pseudo_merge_commits(atoi(argv[2])); if (argc == 3 && !strcmp(argv[1], "dump-pseudo-merge-objects")) return bitmap_dump_pseudo_merge_objects(atoi(argv[2])); + if (argc == 3 && !strcmp(argv[1], "write")) + return bitmap_write(argv[2]); usage("\ttest-tool bitmap list-commits\n" "\ttest-tool bitmap list-commits-with-offset\n" "\ttest-tool bitmap dump-hashes\n" "\ttest-tool bitmap dump-pseudo-merges\n" "\ttest-tool bitmap dump-pseudo-merge-commits \n" - "\ttest-tool bitmap dump-pseudo-merge-objects "); + "\ttest-tool bitmap dump-pseudo-merge-objects \n" + "\ttest-tool bitmap write < "); return -1; } diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index f693cb5669..efeb71593b 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -648,4 +648,28 @@ test_expect_success 'truncated bitmap fails gracefully (lookup table)' ' test_grep corrupted.bitmap.index stderr ' +test_expect_success 'test-tool bitmap write determines bitmap selection' ' + test_when_finished "rm -fr bitmap-write-helper" && + git init bitmap-write-helper && + ( + cd bitmap-write-helper && + + test_commit_bulk 64 && + git repack -ad && + + pack="$(ls .git/objects/pack/pack-*.pack)" && + + git rev-parse HEAD >in && + test-tool bitmap write "$(basename $pack)" bitmaps.raw && + sort bitmaps.raw >bitmaps && + test_cmp in bitmaps && + + git rev-list --count --objects --use-bitmap-index HEAD >actual && + git rev-list --count --objects HEAD >expect && + test_cmp expect actual + ) +' + test_done From 49369d8290c3a5c95d835df85fdf53eba7562496 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:46:51 -0400 Subject: [PATCH 2/9] t5333: demonstrate various pseudo-merge bugs Using the test helper introduced via the previous commit, add various failing tests demonstrating bugs in the pseudo-merge implementation. These are all marked as failing with one exception. The "sampleRate=0" test describes a latent bug, which is only reachable through a code path that is itself masked by a separate bug. A future commit will fix that bug, and, in turn, cause the aforementioned test to fail. Accordingly, that commit will mark the test as failing, and it will be re-marked as passing in a separate commit which fixes the once-latent bug. For the rest: the following commits will explain and fix the underlying bugs in detail. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5333-pseudo-merge-bitmaps.sh | 198 ++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 1f7a5d82ee..0e9638c31c 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -462,4 +462,202 @@ test_expect_success 'use pseudo-merge in boundary traversal' ' ) ' +test_expect_failure 'apply pseudo-merges during fill-in traversal' ' + test_when_finished "rm -fr pseudo-merge-fill-in-traversal" && + git init pseudo-merge-fill-in-traversal && + ( + cd pseudo-merge-fill-in-traversal && + + git config bitmapPseudoMerge.test.pattern refs/tags/ && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.stableThreshold never && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack=$(ls .git/objects/pack/pack-*.pack) && + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 1 in && + while read oid + do + echo "create refs/group-$side/$oid $oid" || return 1 + done in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 2 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 2 in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 2 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 2 in && + GIT_TEST_DATE_NOW=$test_tick \ + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges + ) +' + +test_expect_success 'sampleRate=0 does not cause division by zero' ' + test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && + git init pseudo-merge-sample-rate-zero && + ( + cd pseudo-merge-sample-rate-zero && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack="$(ls .git/objects/pack/pack-*.pack)" && + + git config bitmapPseudoMerge.test.pattern "refs/tags/" && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.sampleRate 0 && + git config bitmapPseudoMerge.test.threshold now && + git config bitmapPseudoMerge.test.stableThreshold never && + + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" Date: Mon, 11 May 2026 20:46:54 -0400 Subject: [PATCH 3/9] pack-bitmap-write: sort pseudo-merge commit lookup table in pack order The pseudo-merge commit lookup table stores each commit's position in the pack- or pseudo-pack order, and is used to perform a binary search in order to determine which pseudo-merge(s) a given commit belongs to. However, the table was previously sorted in lexical order (via `oid_array_sort()`), causing the binary search to fail. While this causes pseudo-merge bitmaps to be de-facto broken for fill-in traversal, there are a couple of important points to keep in mind: * Pseudo-merge application during the initial phases of a bitmap-based traversal are applied via `cascade_pseudo_merges_1()`. This function enumerates the known pseudo-merges and determines if its parents are a subset of the traversal roots. This is a different path than the fill-in traversal, where we are looking for any pseudo-merges which may be satisfied after visiting some commit along an object walk, which involves the aforementioned (broken) binary search. As a consequence, any pseudo-merges we apply at this stage are done so correctly. * While this bug makes applying pseudo-merges during fill-in traversal effectively broken, it does not produce wrong results. Instead of applying the *wrong* pseudo-merge, we will simply fail to find satisfied pseudo-merges, leaving the traversal to use the existing fill-in routines. Fix this by sorting the table by bit position before writing, matching the order that the reader's binary search expects. This does produce a change the on-disk format insofar as the actual code now complies with the documented format (for more details, refer to: Documentation/technical/bitmap-format.adoc). Given that this never worked in the first place, such a change should be OK to perform. If an out-of-tree implementation of pseudo-merges happened to generate bitmaps that comply with the documented format, they will continue to be read and interpreted as normal. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 21 ++++++++++++++++++++- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 8338d7217e..86ed6a5d78 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -819,6 +819,20 @@ static void write_selected_commits_v1(struct bitmap_writer *writer, } } +static int pseudo_merge_commit_pos_cmp(const void *_va, const void *_vb, + void *_data) +{ + struct bitmap_writer *writer = _data; + uint32_t pos_a = find_object_pos(writer, _va, NULL); + uint32_t pos_b = find_object_pos(writer, _vb, NULL); + + if (pos_a < pos_b) + return -1; + if (pos_a > pos_b) + return 1; + return 0; +} + static void write_pseudo_merges(struct bitmap_writer *writer, struct hashfile *f) { @@ -876,7 +890,12 @@ static void write_pseudo_merges(struct bitmap_writer *writer, oid_array_append(&commits, &kh_key(writer->pseudo_merge_commits, i)); } - oid_array_sort(&commits); + /* + * Sort the commits by their bit position so that the lookup + * table can be binary searched by the reader (see + * find_pseudo_merge()). + */ + QSORT_S(commits.oid, commits.nr, pseudo_merge_commit_pos_cmp, writer); /* write lookup table (non-extended) */ for (i = 0; i < commits.nr; i++) { diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 0e9638c31c..3d7a766812 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -462,7 +462,7 @@ test_expect_success 'use pseudo-merge in boundary traversal' ' ) ' -test_expect_failure 'apply pseudo-merges during fill-in traversal' ' +test_expect_success 'apply pseudo-merges during fill-in traversal' ' test_when_finished "rm -fr pseudo-merge-fill-in-traversal" && git init pseudo-merge-fill-in-traversal && ( From b1e3fcdb9b087b4d69836dd5a228648008ff419a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:46:57 -0400 Subject: [PATCH 4/9] pack-bitmap: fix inverted binary search in `pseudo_merge_at()` The binary search in `pseudo_merge_at()` has its "lo" and "hi" updates swapped: when the midpoint's offset is less than the target, it sets `hi = mi` (searching left) instead of `lo = mi + 1` (searching right), and vice versa. This means that lookups for pseudo-merges whose offset is not near the midpoint of the pseudo-merge table are likely to fail. In practice, with a single pseudo-merge group this is masked because the lone entry is always at the midpoint. With multiple groups, the inverted comparisons cause lookups to search in the wrong direction, potentially missing entries. Swap the "lo" and "hi" assignments to search in the correct direction, making it possible to apply pseudo-merges during fill-in when more than one pseudo-merge exists in a group. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index ff18b6c364..fb71c76179 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -559,9 +559,9 @@ static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm, if (got == want) return use_pseudo_merge(pm, &pm->v[mi]); else if (got < want) - hi = mi; - else lo = mi + 1; + else + hi = mi; } warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX), diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 3d7a766812..5411fbf1e0 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -496,7 +496,7 @@ test_expect_success 'apply pseudo-merges during fill-in traversal' ' ) ' -test_expect_failure 'apply pseudo-merges from multiple groups during fill-in' ' +test_expect_success 'apply pseudo-merges from multiple groups during fill-in' ' test_when_finished "rm -fr pseudo-merge-fill-in-multi" && git init pseudo-merge-fill-in-multi && ( From 8b5f199f302869cc60cf9390ce46003b6b3fab48 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:00 -0400 Subject: [PATCH 5/9] pack-bitmap: fix pseudo-merge lookup for shared commits When a commit appears in more than one pseudo-merge group, its entry in the commit lookup table has the high bit set in its offset field, indicating that the offset points to an "extended" table containing the set of pseudo-merges for that commit. There are three bugs in this path: * The `next_ext` offset in `write_pseudo_merges()` undercounts the per-entry size of the lookup table (8 vs. 12 bytes). * `nth_pseudo_merge_ext()` calls `read_pseudo_merge_commit_at()` on a pseudo-merge bitmap offset, misinterpreting it as a 12-byte commit table entry. * The error check after `pseudo_merge_ext_at()` in `apply_pseudo_merges_for_commit()` tests `< -1` instead of `< 0`, silently swallowing errors from `error()`. The first bug is on the write side: each commit lookup entry contains a 4- and 8-byte unsigned value for a total of 12 bytes, but the calculation assumes that the entry only contains 8 bytes of data. This makes `next_ext` too small, so the extended-table offsets that get written point into the middle of the non-extended lookup table rather than past it. The reader then interprets non-extended lookup data as extended entries, producing garbage. The second bug is on the read side and is independently fatal: even with a correctly positioned extended table, `nth_pseudo_merge_ext()` feeds the offset it reads (which points at pseudo-merge bitmap data) to `read_pseudo_merge_commit_at()`. That function tries to parse 12 bytes as a `pseudo_merge_commit` struct, clobbering `merge->pseudo_merge_ofs` with whatever happens to be at that location. The caller only needs `pseudo_merge_ofs`, so the fix is to store the offset directly rather than re-parsing a commit table entry. The `commit_pos` field is left untouched, retaining the value that `find_pseudo_merge()` set earlier. The third bug is latent. With the first two fixes applied, the extended table is correctly written and read, so `pseudo_merge_ext_at()` does not fail during normal operation. The `< -1` vs `< 0` distinction only matters when the bitmap file is corrupt or truncated, in which case the error would be silently ignored and the code would proceed with uninitialized data. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 2 +- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 86ed6a5d78..1c8070f99c 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -877,7 +877,7 @@ static void write_pseudo_merges(struct bitmap_writer *writer, next_ext = st_add(hashfile_total(f), st_mult(kh_size(writer->pseudo_merge_commits), - sizeof(uint64_t))); + sizeof(uint32_t) + sizeof(uint64_t))); table_start = hashfile_total(f); diff --git a/pseudo-merge.c b/pseudo-merge.c index fb71c76179..34e1da00b4 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -600,7 +600,7 @@ static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm, return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"), (uintmax_t)ofs, (uintmax_t)pm->map_size); - read_pseudo_merge_commit_at(merge, pm->map + ofs); + merge->pseudo_merge_ofs = ofs; return 0; } @@ -671,7 +671,7 @@ int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm, off_t ofs = merge_commit.pseudo_merge_ofs & ~((uint64_t)1<<63); uint32_t i; - if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) { + if (pseudo_merge_ext_at(pm, &ext, ofs) < 0) { warning(_("could not read extended pseudo-merge table " "for commit %s"), oid_to_hex(&commit->object.oid)); diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 5411fbf1e0..90459da5e6 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -549,7 +549,7 @@ test_expect_success 'apply pseudo-merges from multiple groups during fill-in' ' ) ' -test_expect_failure 'apply pseudo-merges with overlapping groups during fill-in' ' +test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' ' test_when_finished "rm -fr pseudo-merge-fill-in-overlap" && git init pseudo-merge-fill-in-overlap && ( From 78e85e05f3341c70d1b9a147eef8c61478cc15f9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:03 -0400 Subject: [PATCH 6/9] pack-bitmap: parse commits in `find_pseudo_merge_group_for_ref()` `find_pseudo_merge_group_for_ref()` uses the commit's date to classify it as either "stable" (older than the stable threshold) or "unstable" (otherwise). However, to find the relevant commit from a given OID, the function `find_pseudo_merge_group_for_ref()` uses `lookup_commit()` which does not parse commits. Because an unparsed commit has its "date" set to zero, every candidate is placed in the "stable" bucket regardless of its actual committer timestamp. This means the `bitmapPseudoMerge.*.threshold` and `stableThreshold` configuration options have no effect: the stable/unstable split is always determined by comparing against zero rather than the real commit date. The net result is that pseudo-merge groups are partitioned by `stableSize` instead of the intended decay-based sizing, and the `sampleRate` knob (which only applies to the unstable path) is never exercised. Fix this by calling `repo_parse_commit()` after `lookup_commit()`, bailing out of the callback if parsing fails. The corresponding test configures two pseudo-merge groups that both match all tags. The "stable" group uses `threshold=1.month.ago`, and the "all" group uses `threshold=now`. The test use our custom "GIT_TEST_DATE_NOW" environment variable by setting it to the value of "$test_tick" to align Git's notion of "now" (and therefore "1.month.ago") with the `test_tick` timestamps, so the commits appear to be younger than one month: only the "all" group matches them, producing exactly one pseudo-merge. Without the fix every commit has `date == 0`, which satisfies `date <= threshold` for both groups (since 0 is older than one month ago), and the "stable" group erroneously matches as well. Now that commits are correctly classified as "unstable", the bug described in the test exercising the "sampleRate=0" test is reachable, and the test is marked as failing. It will be fixed in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 2 ++ t/t5333-pseudo-merge-bitmaps.sh | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index 34e1da00b4..d79e5fb649 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -236,6 +236,8 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d c = lookup_commit(the_repository, maybe_peeled); if (!c) return 0; + if (repo_parse_commit(the_repository, c)) + return 0; if (!packlist_find(writer->to_pack, maybe_peeled)) return 0; diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 90459da5e6..0032a16606 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -592,32 +592,34 @@ test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' ) ' -test_expect_failure 'pseudo-merge commits are correctly classified by date' ' +test_expect_success 'pseudo-merge commits are correctly classified by date' ' test_when_finished "rm -fr pseudo-merge-date-classification" && git init pseudo-merge-date-classification && ( cd pseudo-merge-date-classification && test_commit_bulk 64 && + tag_everything && git repack -ad && pack="$(ls .git/objects/pack/pack-*.pack)" && # Configure two pseudo-merge groups: one that only - # matches "stable" refs (older than one month), and one - # that matches all refs. With 64 freshly-created tags - # (all younger than one month) the stable group should - # have zero pseudo-merges and the catch-all group should - # have one. + # matches "stable" refs (older than one month), and + # one that matches all refs. With 64 tags whose + # commits are all younger than one month, the + # "stable" group should have zero pseudo-merges and + # the "all" group should have one. # # Use GIT_TEST_DATE_NOW to align "now" (and therefore # "1.month.ago") with the test_tick timestamps so that # the commits are within the last month. # - # This exercises the date-based classification in - # find_pseudo_merge_group_for_ref(), which requires - # that commits are parsed before inspecting their date. + # Without parsing the commit, its date field would + # be zero, causing it to satisfy date <= threshold + # for the "stable" group as well, and both groups + # would produce pseudo-merges. git config bitmapPseudoMerge.stable.pattern "refs/tags/" && git config bitmapPseudoMerge.stable.maxMerges 64 && git config bitmapPseudoMerge.stable.stableThreshold never && @@ -637,7 +639,7 @@ test_expect_failure 'pseudo-merge commits are correctly classified by date' ' ) ' -test_expect_success 'sampleRate=0 does not cause division by zero' ' +test_expect_failure 'sampleRate=0 does not cause division by zero' ' test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && git init pseudo-merge-sample-rate-zero && ( From 03c7a30ceeaee8d70ff2e2cbd9b4bce896841bfa Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:06 -0400 Subject: [PATCH 7/9] pack-bitmap: reject pseudo-merge "sampleRate" of 0 The "bitmapPseudoMerge.*.sampleRate" configuration controls what fraction of unstable commits are included in each pseudo-merge group. The config validation accepts values in the range `[0, 1]`, but a value of exactly 0 causes a division by zero in `select_pseudo_merges_1()`: if (j % (uint32_t)(1.0 / group->sample_rate)) When `sample_rate` is 0, `1.0 / 0.0` produces `+inf`, and casting infinity to `uint32_t` is undefined behavior in C. On most platforms this yields 0, making the subsequent modulo operation (`j % 0`) a fatal arithmetic trap. This path was not previously reachable because an earlier bug caused all pseudo-merge candidates to be classified as "stable" (where the sampling rate is not used), regardless of their actual commit date. Now that the date classification is fixed, the unstable path is exercised and the division by zero can fire. Fix this by changing the validation to require a strict lower bound and thus reject 0. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/bitmap-pseudo-merge.adoc | 4 ++-- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/config/bitmap-pseudo-merge.adoc b/Documentation/config/bitmap-pseudo-merge.adoc index 1f264eca99..6bf52c80ba 100644 --- a/Documentation/config/bitmap-pseudo-merge.adoc +++ b/Documentation/config/bitmap-pseudo-merge.adoc @@ -47,8 +47,8 @@ will be updated more often than a reference pointing at an old commit. bitmapPseudoMerge..sampleRate:: Determines the proportion of non-bitmapped commits (among reference tips) which are selected for inclusion in an - unstable pseudo-merge bitmap. Must be between `0` and `1` - (inclusive). The default is `1`. + unstable pseudo-merge bitmap. Must be greater than `0` and + less than or equal to `1`. The default is `1`. bitmapPseudoMerge..threshold:: Determines the minimum age of non-bitmapped commits (among diff --git a/pseudo-merge.c b/pseudo-merge.c index d79e5fb649..75bed04360 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -169,8 +169,8 @@ static int pseudo_merge_config(const char *var, const char *value, } } else if (!strcmp(key, "samplerate")) { group->sample_rate = git_config_double(var, value, ctx->kvi); - if (!(0 <= group->sample_rate && group->sample_rate <= 1)) { - warning(_("%s must be between 0 and 1, using default"), var); + if (!(0 < group->sample_rate && group->sample_rate <= 1)) { + warning(_("%s must be between 0 (exclusive) and 1, using default"), var); group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; } } else if (!strcmp(key, "threshold")) { diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 0032a16606..5bfbbd4214 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -639,7 +639,7 @@ test_expect_success 'pseudo-merge commits are correctly classified by date' ' ) ' -test_expect_failure 'sampleRate=0 does not cause division by zero' ' +test_expect_success 'sampleRate=0 does not cause division by zero' ' test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && git init pseudo-merge-sample-rate-zero && ( From 84780db63657057bee11d898ad6c211730d4212f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:09 -0400 Subject: [PATCH 8/9] Documentation: fix broken `sampleRate` in gitpacking(7) The documentation explaining some sample configurations for bitmap pseudo-merges incorrectly uses a sample rate outside of the allowed (0,1] range. This dates back to faf558b23ef (pseudo-merge: implement support for selecting pseudo-merge commits, 2024-05-23), and was likely written when the allowable range for this configuration was the integral values between (0,100]. Fix this to conform to the actual allowable range for this configuration. Noticed-by: Elijah Newren Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/gitpacking.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gitpacking.adoc b/Documentation/gitpacking.adoc index a56596e2d1..e6de6ec824 100644 --- a/Documentation/gitpacking.adoc +++ b/Documentation/gitpacking.adoc @@ -150,7 +150,7 @@ with a configuration like so: pattern = "refs/" threshold = now stableThreshold = never - sampleRate = 100 + sampleRate = 1 maxMerges = 64 ---- @@ -177,7 +177,7 @@ like: pattern = "refs/virtual/([0-9]+)/(heads|tags)/" threshold = now stableThreshold = never - sampleRate = 100 + sampleRate = 1 maxMerges = 64 ---- From 5e6e8dc7860374d79bad3e2a3ade0c2d391bbad6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:12 -0400 Subject: [PATCH 9/9] pack-bitmap: prevent pattern leak on pseudo-merge re-assignment When "bitmapPseudoMerge.*.pattern" appears more than once for the same group, `pseudo_merge_config()` frees the old `regex_t *` pointer but does not call `regfree()` on it first. This leaks whatever internal state `regcomp()` allocated. The final cleanup path in `pseudo_merge_group_release()` does call `regfree()` before `free()`, so only the intermediate replacement is affected. Fix this by guarding the replacement with a NULL check and calling `regfree()` before `free()` when the pointer is non-NULL. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 5 ++++- t/t5333-pseudo-merge-bitmaps.sh | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index 75bed04360..22b8600d68 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -150,7 +150,10 @@ static int pseudo_merge_config(const char *var, const char *value, if (!strcmp(key, "pattern")) { struct strbuf re = STRBUF_INIT; - free(group->pattern); + if (group->pattern) { + regfree(group->pattern); + free(group->pattern); + } if (*value != '^') strbuf_addch(&re, '^'); strbuf_addstr(&re, value); diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 5bfbbd4214..305d677108 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -662,4 +662,33 @@ test_expect_success 'sampleRate=0 does not cause division by zero' ' ) ' +test_expect_success 'duplicate pseudo-merge pattern does not leak' ' + test_when_finished "rm -fr pseudo-merge-dup-pattern" && + git init pseudo-merge-dup-pattern && + ( + cd pseudo-merge-dup-pattern && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack=$(ls .git/objects/pack/pack-*.pack) && + + # Set the same group'\''s pattern twice. The second + # assignment should cleanly release the compiled regex + # from the first without leaking. + git config bitmapPseudoMerge.test.pattern "refs/tags/" && + git config --add bitmapPseudoMerge.test.pattern "refs/tags/" && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.threshold now && + git config bitmapPseudoMerge.test.stableThreshold never && + + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges + ) +' + test_done