Merge branch 'tb/bitmap-build-performance'

Reachability bitmap generation has been significantly optimized. By
reordering tree traversal, caching object positions, and refining how
pseudo-merge bitmaps are constructed, the performance of "git repack
--write-midx-bitmaps" is improved, especially for large repositories
and when using pseudo-merges.

* tb/bitmap-build-performance:
  pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
  pack-bitmap: remember pseudo-merge parents
  pack-bitmap: sort bitmaps before XORing
  pack-bitmap: cache object positions during fill
  pack-bitmap: consolidate `find_object_pos()` success path
  pack-bitmap: reuse stored selected bitmaps
  pack-bitmap: check subtree bits before recursing
  pack-bitmap: pass object position to `fill_bitmap_tree()`
main
Junio C Hamano 2026-06-09 10:04:49 +09:00
commit 1c0af131cc
2 changed files with 380 additions and 64 deletions

View File

@ -32,6 +32,7 @@ struct bitmapped_commit {
struct commit *commit;
struct ewah_bitmap *bitmap;
struct ewah_bitmap *write_as;
struct ewah_bitmap *pseudo_merge_parents;
int flags;
int xor_offset;
uint32_t commit_pos;
@ -89,6 +90,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
ewah_free(writer->tags);

kh_destroy_oid_map(writer->bitmaps);
free(writer->pos_cache);

kh_foreach_value(writer->pseudo_merge_commits, idx,
free_pseudo_merge_commit_idx(idx));
@ -101,6 +103,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
if (bc->write_as != bc->bitmap)
ewah_free(bc->write_as);
ewah_free(bc->bitmap);
ewah_free(bc->pseudo_merge_parents);
}
free(writer->selected);
}
@ -209,38 +212,116 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer,
writer->selected[writer->selected_nr].write_as = NULL;
writer->selected[writer->selected_nr].flags = 0;
writer->selected[writer->selected_nr].pseudo_merge = pseudo_merge;
writer->selected[writer->selected_nr].pseudo_merge_parents = NULL;

writer->selected_nr++;
}

struct bitmap_pos_cache_entry {
struct object_id oid;
uint32_t pos;
};

#define BITMAP_POS_MIN_CACHE_SIZE (1U << 10)
#define BITMAP_POS_MAX_CACHE_SIZE (1U << 21)
#define BITMAP_POS_CACHE_VALID (1U << 31)

static void bitmap_writer_init_pos_cache(struct bitmap_writer *writer)
{
if (writer->pos_cache)
return;

writer->pos_cache_nr = BITMAP_POS_MIN_CACHE_SIZE;

while (writer->pos_cache_nr < writer->to_pack->nr_objects &&
writer->pos_cache_nr < BITMAP_POS_MAX_CACHE_SIZE)
writer->pos_cache_nr <<= 1;

CALLOC_ARRAY(writer->pos_cache, writer->pos_cache_nr);
}

static size_t bitmap_writer_pos_cache_slot(struct bitmap_writer *writer,
const struct object_id *oid)
{
return oidhash(oid) & (writer->pos_cache_nr - 1);
}

static bool bitmap_writer_pos_cache_valid(struct bitmap_writer *writer,
size_t slot)
{
return !!(writer->pos_cache[slot].pos & BITMAP_POS_CACHE_VALID);
}

static int find_cached_object_pos(struct bitmap_writer *writer,
const struct object_id *oid, uint32_t *pos)
{
size_t slot = bitmap_writer_pos_cache_slot(writer, oid);

if (bitmap_writer_pos_cache_valid(writer, slot) &&
oideq(&writer->pos_cache[slot].oid, oid)) {
writer->pos_cache_hits++;
*pos = writer->pos_cache[slot].pos & ~BITMAP_POS_CACHE_VALID;
return 1;
}

writer->pos_cache_misses++;
return 0;
}

static uint32_t store_cached_object_pos(struct bitmap_writer *writer,
const struct object_id *oid,
uint32_t pos)
{
size_t slot;

if (pos & BITMAP_POS_CACHE_VALID)
return pos; /* too large to cache */

slot = bitmap_writer_pos_cache_slot(writer, oid);

oidcpy(&writer->pos_cache[slot].oid, oid);
writer->pos_cache[slot].pos = pos | BITMAP_POS_CACHE_VALID;

return pos;
}

static uint32_t find_object_pos(struct bitmap_writer *writer,
const struct object_id *oid, int *found)
{
struct object_entry *entry;
uint32_t pos;

bitmap_writer_init_pos_cache(writer);

if (find_cached_object_pos(writer, oid, &pos)) {
if (found)
*found = 1;
return pos;
}

entry = packlist_find(writer->to_pack, oid);
if (entry) {
uint32_t base_objects = 0;

if (writer->midx)
base_objects = writer->midx->num_objects +
writer->midx->num_objects_in_base;

if (found)
*found = 1;
return oe_in_pack_pos(writer->to_pack, entry) + base_objects;
pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects;
} else if (writer->midx) {
uint32_t at, pos;
uint32_t at;

if (!bsearch_midx(oid, writer->midx, &at))
goto missing;
if (midx_to_pack_pos(writer->midx, at, &pos) < 0)
goto missing;

if (found)
*found = 1;
return pos;
} else {
goto missing;
}

if (found)
*found = 1;
return store_cached_object_pos(writer, oid, pos);

missing:
if (found)
*found = 0;
@ -249,11 +330,40 @@ missing:
return 0;
}

static int bitmapped_commit_date_cmp(const void *_a, const void *_b)
{
const struct bitmapped_commit *a = _a;
const struct bitmapped_commit *b = _b;

if (a->commit->date < b->commit->date)
return -1;
if (a->commit->date > b->commit->date)
return 1;
return 0;
}

static void compute_xor_offsets(struct bitmap_writer *writer)
{
static const int MAX_XOR_OFFSET_SEARCH = 10;

int i, next = 0;
int nr = bitmap_writer_nr_selected_commits(writer);

if (nr > 1) {
QSORT(writer->selected, nr, bitmapped_commit_date_cmp);

for (i = 0; i < nr; i++) {
struct bitmapped_commit *stored = &writer->selected[i];
khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
stored->commit->object.oid);

if (hash_pos == kh_end(writer->bitmaps))
BUG("selected commit missing from bitmap map: %s",
oid_to_hex(&stored->commit->object.oid));

kh_value(writer->bitmaps, hash_pos) = stored;
}
}

while (next < writer->selected_nr) {
struct bitmapped_commit *stored = &writer->selected[next];
@ -336,13 +446,17 @@ static void bitmap_builder_init(struct bitmap_builder *bb,
revs.topo_order = 1;
revs.first_parent_only = 1;

for (i = 0; i < writer->selected_nr; i++) {
for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) {
struct bitmapped_commit *bc = &writer->selected[i];
struct bb_commit *ent = bb_data_at(&bb->data, bc->commit);

if (bc->pseudo_merge)
BUG("unexpected pseudo-merge at %"PRIuMAX,
(uintmax_t)i);

ent->selected = 1;
ent->maximal = 1;
ent->pseudo_merge = bc->pseudo_merge;
ent->pseudo_merge = 0;
ent->idx = i;

ent->commit_mask = bitmap_new();
@ -456,22 +570,13 @@ static void bitmap_builder_clear(struct bitmap_builder *bb)

static int fill_bitmap_tree(struct bitmap_writer *writer,
struct bitmap *bitmap,
struct tree *tree)
struct tree *tree,
uint32_t pos)
{
int found;
uint32_t pos;
struct tree_desc desc;
struct name_entry entry;

/*
* If our bit is already set, then there is nothing to do. Both this
* tree and all of its children will be set.
*/
pos = find_object_pos(writer, &tree->object.oid, &found);
if (!found)
return -1;
if (bitmap_get(bitmap, pos))
return 0;
bitmap_set(bitmap, pos);

if (repo_parse_tree(writer->repo, tree) < 0)
@ -482,8 +587,21 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
while (tree_entry(&desc, &entry)) {
switch (object_type(entry.mode)) {
case OBJ_TREE:
pos = find_object_pos(writer, &entry.oid, &found);
if (!found)
return -1;
if (bitmap_get(bitmap, pos)) {
/*
* If our bit is already set, then there
* is nothing to do. Both this tree and
* all of its children will be set.
*/
break;
}

if (fill_bitmap_tree(writer, bitmap,
lookup_tree(writer->repo, &entry.oid)) < 0)
lookup_tree(writer->repo,
&entry.oid), pos) < 0)
return -1;
break;
case OBJ_BLOB:
@ -504,6 +622,11 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,

static int reused_bitmaps_nr;
static int reused_pseudo_merge_bitmaps_nr;
static int pseudo_merge_bitmap_nr;
static int pseudo_merge_bitmap_parents;

static int fill_bitmap_commit_calls_nr;
static int fill_bitmap_commit_found_ancestor_nr;

static int fill_bitmap_commit(struct bitmap_writer *writer,
struct bb_commit *ent,
@ -514,7 +637,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
const uint32_t *mapping)
{
int found;
int from_pseudo_merge = commit->object.flags & BITMAP_PSEUDO_MERGE;
uint32_t pos;

if (ent->pseudo_merge)
BUG("unexpected pseudo-merge commit in fill_bitmap_commit()");

fill_bitmap_commit_calls_nr++;

if (!ent->bitmap)
ent->bitmap = bitmap_new();

@ -528,10 +658,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
struct ewah_bitmap *old;
struct bitmap *remapped = bitmap_new();

if (commit->object.flags & BITMAP_PSEUDO_MERGE)
old = pseudo_merge_bitmap_for_commit(old_bitmap, c);
else
old = bitmap_for_commit(old_bitmap, c);
old = bitmap_for_commit(old_bitmap, c);
/*
* If this commit has an old bitmap, then translate that
* bitmap and add its bits to this one. No need to walk
@ -540,26 +667,65 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
if (old && !rebuild_bitmap(mapping, old, remapped)) {
bitmap_or(ent->bitmap, remapped);
bitmap_free(remapped);
if (commit->object.flags & BITMAP_PSEUDO_MERGE)
reused_pseudo_merge_bitmaps_nr++;
else
reused_bitmaps_nr++;
reused_bitmaps_nr++;
continue;
}
bitmap_free(remapped);
}

/*
* If we encounter an ancestor for which we have already
* computed a bitmap during this build (i.e. a regular
* selected commit processed earlier in topo order), we can
* short-circuit the walk: its stored bitmap already covers
* the commit itself, its tree, and all of its ancestors.
*/
if (c != commit) {
khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
c->object.oid);
if (hash_pos != kh_end(writer->bitmaps)) {
struct bitmapped_commit *stored =
kh_value(writer->bitmaps, hash_pos);
if (stored && stored->bitmap) {
fill_bitmap_commit_found_ancestor_nr++;
bitmap_or_ewah(ent->bitmap,
stored->bitmap);
continue;
}
}
}

/*
* Mark ourselves and queue our tree. The commit
* walk ensures we cover all parents.
*/
if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) {
struct tree *tree;

if (from_pseudo_merge && !c->object.parsed) {
/*
* Commits reachable from selected
* non-pseudo-merges are already parsed
* by the regular bitmap build.
*
* However, pseudo-merge fills can also
* reach commits that were not covered
* there, so parse any such leftovers
* before reading their tree or parents.
*/
if (repo_parse_commit(writer->repo, c))
return -1;
}

pos = find_object_pos(writer, &c->object.oid, &found);
if (!found)
return -1;
bitmap_set(ent->bitmap, pos);
prio_queue_put(tree_queue,
repo_get_commit_tree(writer->repo, c));

tree = repo_get_commit_tree(writer->repo, c);
if (!tree)
return -1;
prio_queue_put(tree_queue, tree);
}

for (p = c->parents; p; p = p->next) {
@ -575,13 +741,158 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
}

while (tree_queue->nr) {
if (fill_bitmap_tree(writer, ent->bitmap,
prio_queue_get(tree_queue)) < 0)
struct tree *t = prio_queue_get(tree_queue);
int found;

pos = find_object_pos(writer, &t->object.oid, &found);
if (!found)
return -1;
if (bitmap_get(ent->bitmap, pos)) {
/*
* If our bit is already set, then there is
* nothing to do. Both this tree and all of its
* children will be set.
*/
continue;
}

if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0)
return -1;
}
return 0;
}

static int reuse_pseudo_merge_bitmap(struct bitmap_index *old_bitmap,
const uint32_t *mapping,
struct commit *merge,
struct ewah_bitmap **out)
{
struct ewah_bitmap *old;
struct bitmap *remapped;

if (!old_bitmap || !mapping)
return 0;

old = pseudo_merge_bitmap_for_commit(old_bitmap, merge);
if (!old)
return 0;

remapped = bitmap_new();
if (rebuild_bitmap(mapping, old, remapped) < 0) {
bitmap_free(remapped);
return 0;
}

*out = bitmap_to_ewah(remapped);
bitmap_free(remapped);
reused_pseudo_merge_bitmaps_nr++;
return 1;
}

static int build_pseudo_merge_bitmap(struct bitmap_writer *writer,
struct bitmap_index *old_bitmap,
const uint32_t *mapping,
struct commit *merge,
struct ewah_bitmap **out)
{
struct bb_commit ent = { 0 };
struct prio_queue queue = { NULL };
struct prio_queue tree_queue = { NULL };
unsigned parents = commit_list_count(merge->parents);
int ret;

ent.bitmap = bitmap_new();

pseudo_merge_bitmap_nr++;
pseudo_merge_bitmap_parents += parents;

if (reuse_pseudo_merge_bitmap(old_bitmap, mapping, merge, out)) {
ret = 0;
goto done;
}

ret = fill_bitmap_commit(writer, &ent, merge, &queue, &tree_queue,
old_bitmap, mapping);

if (!ret)
*out = bitmap_to_ewah(ent.bitmap);

done:
bitmap_free(ent.bitmap);
clear_prio_queue(&queue);
clear_prio_queue(&tree_queue);

return ret;
}

static int build_pseudo_merge_bitmaps(struct bitmap_writer *writer,
struct bitmap_index *old_bitmap,
const uint32_t *mapping,
int *nr_stored)
{
size_t i = bitmap_writer_nr_selected_commits(writer);
int ret = 0;

if (!writer->pseudo_merges_nr)
return 0;

trace2_region_enter("pack-bitmap-write", "building_pseudo_merge_bitmaps",
writer->repo);

for (; i < writer->selected_nr; i++) {
struct bitmapped_commit *merge = &writer->selected[i];
struct commit_list *p;
struct bitmap *parents = bitmap_new();
struct ewah_bitmap *objects = NULL;

if (!merge->pseudo_merge)
BUG("found non-pseudo merge commit at %"PRIuMAX,
(uintmax_t)i);

for (p = merge->commit->parents; p; p = p->next) {
int found;
uint32_t pos = find_object_pos(writer,
&p->item->object.oid,
&found);
if (!found) {
bitmap_free(parents);
ret = -1;
goto done;
}
bitmap_set(parents, pos);
}

merge->pseudo_merge_parents = bitmap_to_ewah(parents);
bitmap_free(parents);

if (build_pseudo_merge_bitmap(writer, old_bitmap, mapping,
merge->commit, &objects) < 0) {
ret = -1;
goto done;
}
merge->bitmap = objects;

(*nr_stored)++;
display_progress(writer->progress, *nr_stored);
}

done:
trace2_region_leave("pack-bitmap-write", "building_pseudo_merge_bitmaps",
writer->repo);

trace2_data_intmax("pack-bitmap-write", writer->repo,
"pseudo_merge_bitmap_nr",
pseudo_merge_bitmap_nr);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"building_bitmaps_pseudo_merge_reused",
reused_pseudo_merge_bitmaps_nr);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"pseudo_merge_bitmap_parents",
pseudo_merge_bitmap_parents);

return ret;
}

static void store_selected(struct bitmap_writer *writer,
struct bb_commit *ent, struct commit *commit)
{
@ -616,6 +927,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
writer->progress = start_progress(writer->repo,
"Building bitmaps",
writer->selected_nr);

writer->pos_cache_hits = 0;
writer->pos_cache_misses = 0;

trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
writer->repo);

@ -661,6 +976,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
bitmap_free(ent->bitmap);
ent->bitmap = NULL;
}
if (closed &&
build_pseudo_merge_bitmaps(writer, old_bitmap, mapping,
&nr_stored) < 0)
closed = 0;
clear_prio_queue(&queue);
clear_prio_queue(&tree_queue);
bitmap_builder_clear(&bb);
@ -672,8 +991,15 @@ int bitmap_writer_build(struct bitmap_writer *writer)
trace2_data_intmax("pack-bitmap-write", writer->repo,
"building_bitmaps_reused", reused_bitmaps_nr);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"building_bitmaps_pseudo_merge_reused",
reused_pseudo_merge_bitmaps_nr);
"fill_bitmap_commit_calls_nr",
fill_bitmap_commit_calls_nr);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"fill_bitmap_commit_found_ancestor_nr",
fill_bitmap_commit_found_ancestor_nr);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"bitmap_pos_cache_hits", writer->pos_cache_hits);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"bitmap_pos_cache_misses", writer->pos_cache_misses);

stop_progress(&writer->progress);

@ -837,42 +1163,29 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
struct hashfile *f)
{
struct oid_array commits = OID_ARRAY_INIT;
struct bitmap **commits_bitmap = NULL;
off_t *pseudo_merge_ofs = NULL;
off_t start, table_start, next_ext;

uint32_t base = bitmap_writer_nr_selected_commits(writer);
size_t i, j = 0;

CALLOC_ARRAY(commits_bitmap, writer->pseudo_merges_nr);
CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr);

for (i = 0; i < writer->pseudo_merges_nr; i++) {
struct bitmapped_commit *merge = &writer->selected[base + i];
struct commit_list *p;

if (!merge->pseudo_merge)
BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);

commits_bitmap[i] = bitmap_new();

for (p = merge->commit->parents; p; p = p->next)
bitmap_set(commits_bitmap[i],
find_object_pos(writer, &p->item->object.oid,
NULL));
}

start = hashfile_total(f);

for (i = 0; i < writer->pseudo_merges_nr; i++) {
struct ewah_bitmap *commits_ewah = bitmap_to_ewah(commits_bitmap[i]);
struct bitmapped_commit *merge = &writer->selected[base + i];

if (!merge->pseudo_merge)
BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);

if (!merge->pseudo_merge_parents || !merge->bitmap)
BUG("missing pseudo-merge bitmap for commit %s",
oid_to_hex(&merge->commit->object.oid));

pseudo_merge_ofs[i] = hashfile_total(f);

dump_bitmap(f, commits_ewah);
dump_bitmap(f, writer->selected[base+i].write_as);

ewah_free(commits_ewah);
dump_bitmap(f, merge->pseudo_merge_parents);
dump_bitmap(f, merge->bitmap);
}

next_ext = st_add(hashfile_total(f),
@ -955,12 +1268,8 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
hashwrite_be64(f, table_start - start);
hashwrite_be64(f, hashfile_total(f) - start + sizeof(uint64_t));

for (i = 0; i < writer->pseudo_merges_nr; i++)
bitmap_free(commits_bitmap[i]);

oid_array_clear(&commits);
free(pseudo_merge_ofs);
free(commits_bitmap);
}

static int table_cmp(const void *_va, const void *_vb, void *_data)

View File

@ -132,6 +132,8 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i

off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *);

struct bitmap_pos_cache_entry;

struct bitmap_writer {
struct repository *repo;
struct ewah_bitmap *commits;
@ -143,6 +145,11 @@ struct bitmap_writer {
struct packing_data *to_pack;
struct multi_pack_index *midx; /* if appending to a MIDX chain */

struct bitmap_pos_cache_entry *pos_cache;
size_t pos_cache_nr;
uint64_t pos_cache_hits;
uint64_t pos_cache_misses;

struct bitmapped_commit *selected;
unsigned int selected_nr, selected_alloc;