Merge branch 'tb/pseudo-merge-bugfixes'

Fixes many bugs in pseudo-merge code.

* tb/pseudo-merge-bugfixes:
  pack-bitmap: prevent pattern leak on pseudo-merge re-assignment
  Documentation: fix broken `sampleRate` in gitpacking(7)
  pack-bitmap: reject pseudo-merge "sampleRate" of 0
  pack-bitmap: parse commits in `find_pseudo_merge_group_for_ref()`
  pack-bitmap: fix pseudo-merge lookup for shared commits
  pack-bitmap: fix inverted binary search in `pseudo_merge_at()`
  pack-bitmap-write: sort pseudo-merge commit lookup table in pack order
  t5333: demonstrate various pseudo-merge bugs
  t/helper: add 'test-tool bitmap write' subcommand
main
Junio C Hamano 2026-05-27 14:15:43 +09:00
commit bccafbc09c
7 changed files with 402 additions and 14 deletions

View File

@ -47,8 +47,8 @@ will be updated more often than a reference pointing at an old commit.
bitmapPseudoMerge.<name>.sampleRate::
Determines the proportion of non-bitmapped commits (among
reference tips) which are selected for inclusion in an
unstable pseudo-merge bitmap. Must be between `0` and `1`
(inclusive). The default is `1`.
unstable pseudo-merge bitmap. Must be greater than `0` and
less than or equal to `1`. The default is `1`.

bitmapPseudoMerge.<name>.threshold::
Determines the minimum age of non-bitmapped commits (among

View File

@ -150,7 +150,7 @@ with a configuration like so:
pattern = "refs/"
threshold = now
stableThreshold = never
sampleRate = 100
sampleRate = 1
maxMerges = 64
----

@ -177,7 +177,7 @@ like:
pattern = "refs/virtual/([0-9]+)/(heads|tags)/"
threshold = now
stableThreshold = never
sampleRate = 100
sampleRate = 1
maxMerges = 64
----


View File

@ -819,6 +819,20 @@ static void write_selected_commits_v1(struct bitmap_writer *writer,
}
}

static int pseudo_merge_commit_pos_cmp(const void *_va, const void *_vb,
void *_data)
{
struct bitmap_writer *writer = _data;
uint32_t pos_a = find_object_pos(writer, _va, NULL);
uint32_t pos_b = find_object_pos(writer, _vb, NULL);

if (pos_a < pos_b)
return -1;
if (pos_a > pos_b)
return 1;
return 0;
}

static void write_pseudo_merges(struct bitmap_writer *writer,
struct hashfile *f)
{
@ -863,7 +877,7 @@ static void write_pseudo_merges(struct bitmap_writer *writer,

next_ext = st_add(hashfile_total(f),
st_mult(kh_size(writer->pseudo_merge_commits),
sizeof(uint64_t)));
sizeof(uint32_t) + sizeof(uint64_t)));

table_start = hashfile_total(f);

@ -876,7 +890,12 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
oid_array_append(&commits, &kh_key(writer->pseudo_merge_commits, i));
}

oid_array_sort(&commits);
/*
* Sort the commits by their bit position so that the lookup
* table can be binary searched by the reader (see
* find_pseudo_merge()).
*/
QSORT_S(commits.oid, commits.nr, pseudo_merge_commit_pos_cmp, writer);

/* write lookup table (non-extended) */
for (i = 0; i < commits.nr; i++) {

View File

@ -150,7 +150,10 @@ static int pseudo_merge_config(const char *var, const char *value,
if (!strcmp(key, "pattern")) {
struct strbuf re = STRBUF_INIT;

free(group->pattern);
if (group->pattern) {
regfree(group->pattern);
free(group->pattern);
}
if (*value != '^')
strbuf_addch(&re, '^');
strbuf_addstr(&re, value);
@ -169,8 +172,8 @@ static int pseudo_merge_config(const char *var, const char *value,
}
} else if (!strcmp(key, "samplerate")) {
group->sample_rate = git_config_double(var, value, ctx->kvi);
if (!(0 <= group->sample_rate && group->sample_rate <= 1)) {
warning(_("%s must be between 0 and 1, using default"), var);
if (!(0 < group->sample_rate && group->sample_rate <= 1)) {
warning(_("%s must be between 0 (exclusive) and 1, using default"), var);
group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
}
} else if (!strcmp(key, "threshold")) {
@ -236,6 +239,8 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d
c = lookup_commit(the_repository, maybe_peeled);
if (!c)
return 0;
if (repo_parse_commit(the_repository, c))
return 0;
if (!packlist_find(writer->to_pack, maybe_peeled))
return 0;

@ -559,9 +564,9 @@ static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm,
if (got == want)
return use_pseudo_merge(pm, &pm->v[mi]);
else if (got < want)
hi = mi;
else
lo = mi + 1;
else
hi = mi;
}

warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX),
@ -600,7 +605,7 @@ static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm,
return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"),
(uintmax_t)ofs, (uintmax_t)pm->map_size);

read_pseudo_merge_commit_at(merge, pm->map + ofs);
merge->pseudo_merge_ofs = ofs;

return 0;
}
@ -671,7 +676,7 @@ int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm,
off_t ofs = merge_commit.pseudo_merge_ofs & ~((uint64_t)1<<63);
uint32_t i;

if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) {
if (pseudo_merge_ext_at(pm, &ext, ofs) < 0) {
warning(_("could not read extended pseudo-merge table "
"for commit %s"),
oid_to_hex(&commit->object.oid));

View File

@ -2,7 +2,10 @@

#include "test-tool.h"
#include "git-compat-util.h"
#include "hex.h"
#include "odb.h"
#include "pack-bitmap.h"
#include "pseudo-merge.h"
#include "setup.h"

static int bitmap_list_commits(void)
@ -35,6 +38,111 @@ static int bitmap_dump_pseudo_merge_objects(uint32_t n)
return test_bitmap_pseudo_merge_objects(the_repository, n);
}

static int add_packed_object(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *_data)
{
struct packing_data *packed = _data;
struct object_entry *entry;
struct object_info oi = OBJECT_INFO_INIT;
enum object_type type;

oi.typep = &type;

entry = packlist_alloc(packed, oid);
entry->idx.offset = nth_packed_object_offset(pack, pos);
if (packed_object_info(pack, entry->idx.offset, &oi) < 0)
die("could not get type of object %s",
oid_to_hex(oid));
oe_set_type(entry, type);
oe_set_in_pack(packed, entry, pack);

return 0;
}

static int idx_oid_cmp(const void *va, const void *vb)
{
const struct pack_idx_entry *a = *(const struct pack_idx_entry **)va;
const struct pack_idx_entry *b = *(const struct pack_idx_entry **)vb;

return oidcmp(&a->oid, &b->oid);
}

static int bitmap_write(const char *basename)
{
struct packed_git *p = NULL;
struct packing_data packed = { 0 };
struct bitmap_writer writer;
struct pack_idx_entry **index;
struct strbuf buf = STRBUF_INIT;
uint32_t i;

prepare_repo_settings(the_repository);
repo_for_each_pack(the_repository, p) {
if (!strcmp(pack_basename(p), basename))
break;
}

if (!p)
die("could not find pack '%s'", basename);

if (open_pack_index(p))
die("cannot open pack index for '%s'", p->pack_name);

prepare_packing_data(the_repository, &packed);

for_each_object_in_pack(p, add_packed_object, &packed,
ODB_FOR_EACH_OBJECT_PACK_ORDER);

/*
* Build the index array now that data.packed.objects[] is
* fully allocated (packlist_alloc() may have reallocated it
* during the loop above).
*/
ALLOC_ARRAY(index, p->num_objects);
for (i = 0; i < p->num_objects; i++)
index[i] = &packed.objects[i].idx;

bitmap_writer_init(&writer, the_repository, &packed, NULL);
bitmap_writer_build_type_index(&writer, index);

while (strbuf_getline_lf(&buf, stdin) != EOF) {
struct object_id oid;
struct commit *c;

if (get_oid_hex(buf.buf, &oid))
die("invalid OID: %s", buf.buf);

c = lookup_commit(the_repository, &oid);
if (!c || repo_parse_commit(the_repository, c))
die("could not parse commit %s", buf.buf);

bitmap_writer_push_commit(&writer, c, 0);
}

select_pseudo_merges(&writer);
if (bitmap_writer_build(&writer) < 0)
die("failed to build bitmaps");

bitmap_writer_set_checksum(&writer, p->hash);

QSORT(index, p->num_objects, idx_oid_cmp);

strbuf_reset(&buf);
strbuf_addstr(&buf, p->pack_name);
strbuf_strip_suffix(&buf, ".pack");
strbuf_addstr(&buf, ".bitmap");
bitmap_writer_finish(&writer, index, buf.buf, 0);

bitmap_writer_free(&writer);
strbuf_release(&buf);
free(index);
clear_packing_data(&packed);

return 0;
}

int cmd__bitmap(int argc, const char **argv)
{
setup_git_directory();
@ -51,13 +159,16 @@ int cmd__bitmap(int argc, const char **argv)
return bitmap_dump_pseudo_merge_commits(atoi(argv[2]));
if (argc == 3 && !strcmp(argv[1], "dump-pseudo-merge-objects"))
return bitmap_dump_pseudo_merge_objects(atoi(argv[2]));
if (argc == 3 && !strcmp(argv[1], "write"))
return bitmap_write(argv[2]);

usage("\ttest-tool bitmap list-commits\n"
"\ttest-tool bitmap list-commits-with-offset\n"
"\ttest-tool bitmap dump-hashes\n"
"\ttest-tool bitmap dump-pseudo-merges\n"
"\ttest-tool bitmap dump-pseudo-merge-commits <n>\n"
"\ttest-tool bitmap dump-pseudo-merge-objects <n>");
"\ttest-tool bitmap dump-pseudo-merge-objects <n>\n"
"\ttest-tool bitmap write <pack-basename> < <commit-list>");

return -1;
}

View File

@ -648,4 +648,28 @@ test_expect_success 'truncated bitmap fails gracefully (lookup table)' '
test_grep corrupted.bitmap.index stderr
'

test_expect_success 'test-tool bitmap write determines bitmap selection' '
test_when_finished "rm -fr bitmap-write-helper" &&
git init bitmap-write-helper &&
(
cd bitmap-write-helper &&

test_commit_bulk 64 &&
git repack -ad &&

pack="$(ls .git/objects/pack/pack-*.pack)" &&

git rev-parse HEAD >in &&
test-tool bitmap write "$(basename $pack)" <in &&

test-tool bitmap list-commits >bitmaps.raw &&
sort bitmaps.raw >bitmaps &&
test_cmp in bitmaps &&

git rev-list --count --objects --use-bitmap-index HEAD >actual &&
git rev-list --count --objects HEAD >expect &&
test_cmp expect actual
)
'

test_done

View File

@ -462,4 +462,233 @@ test_expect_success 'use pseudo-merge in boundary traversal' '
)
'

test_expect_success 'apply pseudo-merges during fill-in traversal' '
test_when_finished "rm -fr pseudo-merge-fill-in-traversal" &&
git init pseudo-merge-fill-in-traversal &&
(
cd pseudo-merge-fill-in-traversal &&

git config bitmapPseudoMerge.test.pattern refs/tags/ &&
git config bitmapPseudoMerge.test.maxMerges 1 &&
git config bitmapPseudoMerge.test.stableThreshold never &&

test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&

pack=$(ls .git/objects/pack/pack-*.pack) &&
git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in &&

test_pseudo_merges >merges &&
test_line_count = 1 merges &&

test_commit stale &&

git rev-list --count --objects HEAD >expect &&

: >trace2.txt &&
GIT_TRACE2_EVENT=$PWD/trace2.txt \
git rev-list --count --objects --use-bitmap-index HEAD >actual &&
test_pseudo_merges_satisfied 1 <trace2.txt &&

test_cmp expect actual
)
'

test_expect_success 'apply pseudo-merges from multiple groups during fill-in' '
test_when_finished "rm -fr pseudo-merge-fill-in-multi" &&
git init pseudo-merge-fill-in-multi &&
(
cd pseudo-merge-fill-in-multi &&

test_commit base &&
base=$(git rev-parse HEAD) &&

for side in left right
do
git checkout -B $side base &&

test_commit_bulk --id=$side 64 &&
git rev-list --no-object-names HEAD --not $base >in &&
while read oid
do
echo "create refs/group-$side/$oid $oid" || return 1
done <in | git update-ref --stdin || return 1
done &&

git checkout left &&
git merge right &&
git repack -ad &&

git config bitmapPseudoMerge.left.pattern "refs/group-left/" &&
git config bitmapPseudoMerge.left.maxMerges 1 &&
git config bitmapPseudoMerge.left.stableThreshold never &&

git config bitmapPseudoMerge.right.pattern "refs/group-right/" &&
git config bitmapPseudoMerge.right.maxMerges 1 &&
git config bitmapPseudoMerge.right.stableThreshold never &&

pack="$(ls .git/objects/pack/pack-*.pack)" &&
git rev-parse "$base" >in &&
test-tool bitmap write "$(basename $pack)" <in &&

test_pseudo_merges >merges &&
test_line_count = 2 merges &&

test_commit stale &&

git rev-list --count --objects HEAD >expect &&

: >trace2.txt &&
GIT_TRACE2_EVENT=$PWD/trace2.txt \
git rev-list --count --objects --use-bitmap-index HEAD >actual &&
test_pseudo_merges_satisfied 2 <trace2.txt &&

test_cmp expect actual
)
'

test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' '
test_when_finished "rm -fr pseudo-merge-fill-in-overlap" &&
git init pseudo-merge-fill-in-overlap &&
(
cd pseudo-merge-fill-in-overlap &&

test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&

pack="$(ls .git/objects/pack/pack-*.pack)" &&

# Use two pseudo-merge group patterns that both match
# refs/tags/, so every tagged commit belongs to both
# groups. This exercises the extended lookup table
# path in apply_pseudo_merges_for_commit().
git config bitmapPseudoMerge.all.pattern "refs/tags/" &&
git config bitmapPseudoMerge.all.maxMerges 1 &&
git config bitmapPseudoMerge.all.stableThreshold never &&

git config bitmapPseudoMerge.tags.pattern "refs/tags/" &&
git config bitmapPseudoMerge.tags.maxMerges 1 &&
git config bitmapPseudoMerge.tags.stableThreshold never &&

git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in &&

test_pseudo_merges >merges &&
test_line_count = 2 merges &&

test_commit stale &&

git rev-list --count --objects HEAD >expect &&

: >trace2.txt &&
GIT_TRACE2_EVENT=$PWD/trace2.txt \
git rev-list --count --objects --use-bitmap-index HEAD >actual &&
test_pseudo_merges_satisfied 2 <trace2.txt &&

test_cmp expect actual
)
'

test_expect_success 'pseudo-merge commits are correctly classified by date' '
test_when_finished "rm -fr pseudo-merge-date-classification" &&
git init pseudo-merge-date-classification &&
(
cd pseudo-merge-date-classification &&

test_commit_bulk 64 &&

tag_everything &&
git repack -ad &&

pack="$(ls .git/objects/pack/pack-*.pack)" &&

# Configure two pseudo-merge groups: one that only
# matches "stable" refs (older than one month), and
# one that matches all refs. With 64 tags whose
# commits are all younger than one month, the
# "stable" group should have zero pseudo-merges and
# the "all" group should have one.
#
# Use GIT_TEST_DATE_NOW to align "now" (and therefore
# "1.month.ago") with the test_tick timestamps so that
# the commits are within the last month.
#
# Without parsing the commit, its date field would
# be zero, causing it to satisfy date <= threshold
# for the "stable" group as well, and both groups
# would produce pseudo-merges.
git config bitmapPseudoMerge.stable.pattern "refs/tags/" &&
git config bitmapPseudoMerge.stable.maxMerges 64 &&
git config bitmapPseudoMerge.stable.stableThreshold never &&
git config bitmapPseudoMerge.stable.threshold 1.month.ago &&

git config bitmapPseudoMerge.all.pattern "refs/tags/" &&
git config bitmapPseudoMerge.all.maxMerges 1 &&
git config bitmapPseudoMerge.all.stableThreshold never &&
git config bitmapPseudoMerge.all.threshold now &&

git rev-parse HEAD~63 >in &&
GIT_TEST_DATE_NOW=$test_tick \
test-tool bitmap write "$(basename $pack)" <in &&

test_pseudo_merges >merges &&
test_line_count = 1 merges
)
'

test_expect_success 'sampleRate=0 does not cause division by zero' '
test_when_finished "rm -fr pseudo-merge-sample-rate-zero" &&
git init pseudo-merge-sample-rate-zero &&
(
cd pseudo-merge-sample-rate-zero &&

test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&

pack="$(ls .git/objects/pack/pack-*.pack)" &&

git config bitmapPseudoMerge.test.pattern "refs/tags/" &&
git config bitmapPseudoMerge.test.maxMerges 1 &&
git config bitmapPseudoMerge.test.sampleRate 0 &&
git config bitmapPseudoMerge.test.threshold now &&
git config bitmapPseudoMerge.test.stableThreshold never &&

git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in
)
'

test_expect_success 'duplicate pseudo-merge pattern does not leak' '
test_when_finished "rm -fr pseudo-merge-dup-pattern" &&
git init pseudo-merge-dup-pattern &&
(
cd pseudo-merge-dup-pattern &&

test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&

pack=$(ls .git/objects/pack/pack-*.pack) &&

# Set the same group'\''s pattern twice. The second
# assignment should cleanly release the compiled regex
# from the first without leaking.
git config bitmapPseudoMerge.test.pattern "refs/tags/" &&
git config --add bitmapPseudoMerge.test.pattern "refs/tags/" &&
git config bitmapPseudoMerge.test.maxMerges 1 &&
git config bitmapPseudoMerge.test.threshold now &&
git config bitmapPseudoMerge.test.stableThreshold never &&

git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in &&

test_pseudo_merges >merges &&
test_line_count = 1 merges
)
'

test_done