From e3959cc78c968d8f029daa48d4aadcb486da0629 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 27 May 2026 15:55:50 -0400 Subject: [PATCH] pack-bitmap: pass object position to `fill_bitmap_tree()` In the following commit, callers of `fill_bitmap_tree()` will be required to check the bit corresponding to their tree before calling that function. That change will reduce the overhead of setting up and tearing down stack frames for trees whose bits are already set. To prepare for that change, have callers pass in the tree's bit position in `fill_bitmap_tree()`, which will make the next commit easier to read. In the meantime, this change has a surprising and measurable benefit during bitmap generation, particularly on very large repositories. When processing sub-trees within `fill_bitmap_tree()`, the preimage of this patch did the following: while (tree_entry(&desc, entry)) { switch (object_type(entry.mode)) { case OBJ_TREE: if (fill_bitmap_tree(writer, bitmap, lookup_tree(writer->repo, &entry.oid)) < 0) { /* ... */ } /* ... */ } } , first performing the object lookup via `lookup_tree()`, and then locating its bit position within the recursive call. This patch effectively reorders those two calls so that we first discover the sub-tree's bit position, *then* load its tree. By reordering these two operations, we spend fewer CPU cycles per instruction, likely due to improved CPU dependency/cache/pipeline behavior. Comparing the results of: running `perf stat` before and after this commit, we have: +--------------+-------------+-------------+-------------------+ | | HEAD^ | HEAD | Delta | +--------------+-------------+-------------+-------------------+ | elapsed | 612.5 s | 582.4 s | -30.1 s (-4.9%) | | cycles | 2,857.3 B | 2,713.3 B | -144.0 B (-5.0%) | | instructions | 2,413.2 B | 2,415.5 B | +2.3 B (+0.1%) | | CPI | 1.184 | 1.123 | -0.061 (-5.1%) | +--------------+-------------+-------------+-------------------+ In a large repository with ~4.8M commit, and ~37.1M tree objects this change improves timing from ~612.5 seconds down to ~582.4 seconds, or a ~4.9% improvement. More importantly, the number of CPU cycles spent dropped off significantly as a result of this commit, lowering our cycles-per-instruction ratio by about ~5.1%. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 1c8070f99c..2d5ff8fd40 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -456,10 +456,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb) static int fill_bitmap_tree(struct bitmap_writer *writer, struct bitmap *bitmap, - struct tree *tree) + struct tree *tree, + uint32_t pos) { int found; - uint32_t pos; struct tree_desc desc; struct name_entry entry; @@ -467,9 +467,6 @@ static int fill_bitmap_tree(struct bitmap_writer *writer, * If our bit is already set, then there is nothing to do. Both this * tree and all of its children will be set. */ - pos = find_object_pos(writer, &tree->object.oid, &found); - if (!found) - return -1; if (bitmap_get(bitmap, pos)) return 0; bitmap_set(bitmap, pos); @@ -482,8 +479,12 @@ static int fill_bitmap_tree(struct bitmap_writer *writer, while (tree_entry(&desc, &entry)) { switch (object_type(entry.mode)) { case OBJ_TREE: + pos = find_object_pos(writer, &entry.oid, &found); + if (!found) + return -1; if (fill_bitmap_tree(writer, bitmap, - lookup_tree(writer->repo, &entry.oid)) < 0) + lookup_tree(writer->repo, + &entry.oid), pos) < 0) return -1; break; case OBJ_BLOB: @@ -575,8 +576,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, } while (tree_queue->nr) { - if (fill_bitmap_tree(writer, ent->bitmap, - prio_queue_get(tree_queue)) < 0) + struct tree *t = prio_queue_get(tree_queue); + int found; + + pos = find_object_pos(writer, &t->object.oid, &found); + if (!found) + return -1; + + if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0) return -1; } return 0;