pack-revindex: prepare for incremental MIDX bitmaps

Prepare the reverse index machinery to handle object lookups in an
incremental MIDX bitmap. These changes are broken out across a few
functions:

  - load_midx_revindex() learns to use the appropriate MIDX filename
    depending on whether the given 'struct multi_pack_index *' is
    incremental or not.

  - pack_pos_to_midx() and midx_to_pack_pos() now both take in a global
    object position in the MIDX pseudo-pack order, and find the
    earliest containing MIDX (similar to midx.c::midx_for_object().

  - midx_pack_order_cmp() adjusts its call to pack_pos_to_midx() by the
    number of objects in the base (since 'vb - midx->revindx_data' is
    relative to the containing MIDX, and pack_pos_to_midx() expects a
    global position).

    Likewise, this function adjusts its output by adding
    m->num_objects_in_base to return a global position out through the
    `*pos` pointer.

Together, these changes are sufficient to use the multi-pack index's
reverse index format for incremental multi-pack reachability bitmaps.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
Taylor Blau 2025-03-20 13:56:31 -04:00 committed by Junio C Hamano
parent 4887bdd4c7
commit 8331c17b79
2 changed files with 56 additions and 21 deletions

View File

@ -170,6 +170,15 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
return read_bitmap(index->map, index->map_size, &index->map_pos); return read_bitmap(index->map, index->map_size, &index->map_pos);
} }


static uint32_t bitmap_num_objects_total(struct bitmap_index *index)
{
if (index->midx) {
struct multi_pack_index *m = index->midx;
return m->num_objects + m->num_objects_in_base;
}
return index->pack->num_objects;
}

static uint32_t bitmap_num_objects(struct bitmap_index *index) static uint32_t bitmap_num_objects(struct bitmap_index *index)
{ {
if (index->midx) if (index->midx)
@ -924,7 +933,7 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,


if (pos < kh_end(positions)) { if (pos < kh_end(positions)) {
int bitmap_pos = kh_value(positions, pos); int bitmap_pos = kh_value(positions, pos);
return bitmap_pos + bitmap_num_objects(bitmap_git); return bitmap_pos + bitmap_num_objects_total(bitmap_git);
} }


return -1; return -1;
@ -992,7 +1001,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git,
bitmap_pos = kh_value(eindex->positions, hash_pos); bitmap_pos = kh_value(eindex->positions, hash_pos);
} }


return bitmap_pos + bitmap_num_objects(bitmap_git); return bitmap_pos + bitmap_num_objects_total(bitmap_git);
} }


struct bitmap_show_data { struct bitmap_show_data {
@ -1342,11 +1351,17 @@ struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_g
if (pos < 0 || pos >= bitmap_num_objects(bitmap_git)) if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
goto done; goto done;


/*
* Use bitmap-relative positions instead of offsetting
* by bitmap_git->num_objects_in_base because we use
* this to find a match in pseudo_merge_for_parents(),
* and pseudo-merge groups cannot span multiple bitmap
* layers.
*/
bitmap_set(parents, pos); bitmap_set(parents, pos);
} }


match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges, match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges, parents);
parents);


done: done:
bitmap_free(parents); bitmap_free(parents);
@ -1500,7 +1515,9 @@ static void show_extended_objects(struct bitmap_index *bitmap_git,
for (i = 0; i < eindex->count; ++i) { for (i = 0; i < eindex->count; ++i) {
struct object *obj; struct object *obj;


if (!bitmap_get(objects, st_add(bitmap_num_objects(bitmap_git), i))) if (!bitmap_get(objects,
st_add(bitmap_num_objects_total(bitmap_git),
i)))
continue; continue;


obj = eindex->objects[i]; obj = eindex->objects[i];
@ -1679,7 +1696,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git,
* them individually. * them individually.
*/ */
for (i = 0; i < eindex->count; i++) { for (i = 0; i < eindex->count; i++) {
size_t pos = st_add(i, bitmap_num_objects(bitmap_git)); size_t pos = st_add(i, bitmap_num_objects_total(bitmap_git));
if (eindex->objects[i]->type == type && if (eindex->objects[i]->type == type &&
bitmap_get(to_filter, pos) && bitmap_get(to_filter, pos) &&
!bitmap_get(tips, pos)) !bitmap_get(tips, pos))
@ -1705,7 +1722,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,


oi.sizep = &size; oi.sizep = &size;


if (pos < bitmap_num_objects(bitmap_git)) { if (pos < bitmap_num_objects_total(bitmap_git)) {
struct packed_git *pack; struct packed_git *pack;
off_t ofs; off_t ofs;


@ -1728,8 +1745,9 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
die(_("unable to get size of %s"), oid_to_hex(&oid)); die(_("unable to get size of %s"), oid_to_hex(&oid));
} }
} else { } else {
size_t eindex_pos = pos - bitmap_num_objects_total(bitmap_git);
struct eindex *eindex = &bitmap_git->ext_index; struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)]; struct object *obj = eindex->objects[eindex_pos];
if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid, if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
&oi, 0) < 0) &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid)); die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
@ -1882,7 +1900,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
uint32_t objects_nr; uint32_t objects_nr;
size_t i, pos; size_t i, pos;


objects_nr = bitmap_num_objects(bitmap_git); objects_nr = bitmap_num_objects_total(bitmap_git);
pos = objects_nr / BITS_IN_EWORD; pos = objects_nr / BITS_IN_EWORD;


if (pos > result->word_alloc) if (pos > result->word_alloc)
@ -2419,7 +2437,7 @@ static uint32_t count_object_type(struct bitmap_index *bitmap_git,
for (i = 0; i < eindex->count; ++i) { for (i = 0; i < eindex->count; ++i) {
if (eindex->objects[i]->type == type && if (eindex->objects[i]->type == type &&
bitmap_get(objects, bitmap_get(objects,
st_add(bitmap_num_objects(bitmap_git), i))) st_add(bitmap_num_objects_total(bitmap_git), i)))
count++; count++;
} }


@ -2820,7 +2838,7 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
BUG("rebuild_existing_bitmaps: missing required rev-cache " BUG("rebuild_existing_bitmaps: missing required rev-cache "
"extension"); "extension");


num_objects = bitmap_num_objects(bitmap_git); num_objects = bitmap_num_objects_total(bitmap_git);
CALLOC_ARRAY(reposition, num_objects); CALLOC_ARRAY(reposition, num_objects);


for (i = 0; i < num_objects; ++i) { for (i = 0; i < num_objects; ++i) {
@ -2963,7 +2981,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
struct object *obj = eindex->objects[i]; struct object *obj = eindex->objects[i];


if (!bitmap_get(result, if (!bitmap_get(result,
st_add(bitmap_num_objects(bitmap_git), i))) st_add(bitmap_num_objects_total(bitmap_git),
i)))
continue; continue;


if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid, if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,

View File

@ -383,8 +383,14 @@ int load_midx_revindex(struct multi_pack_index *m)
trace2_data_string("load_midx_revindex", the_repository, trace2_data_string("load_midx_revindex", the_repository,
"source", "rev"); "source", "rev");


get_midx_filename_ext(m->repo->hash_algo, &revindex_name, m->object_dir, if (m->has_chain)
get_midx_checksum(m), MIDX_EXT_REV); get_split_midx_filename_ext(m->repo->hash_algo, &revindex_name,
m->object_dir, get_midx_checksum(m),
MIDX_EXT_REV);
else
get_midx_filename_ext(m->repo->hash_algo, &revindex_name,
m->object_dir, get_midx_checksum(m),
MIDX_EXT_REV);


ret = load_revindex_from_disk(revindex_name.buf, ret = load_revindex_from_disk(revindex_name.buf,
m->num_objects, m->num_objects,
@ -471,11 +477,15 @@ off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos)


uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos) uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos)
{ {
while (m && pos < m->num_objects_in_base)
m = m->base_midx;
if (!m)
BUG("NULL multi-pack-index for object position: %"PRIu32, pos);
if (!m->revindex_data) if (!m->revindex_data)
BUG("pack_pos_to_midx: reverse index not yet loaded"); BUG("pack_pos_to_midx: reverse index not yet loaded");
if (m->num_objects <= pos) if (m->num_objects + m->num_objects_in_base <= pos)
BUG("pack_pos_to_midx: out-of-bounds object at %"PRIu32, pos); BUG("pack_pos_to_midx: out-of-bounds object at %"PRIu32, pos);
return get_be32(m->revindex_data + pos); return get_be32(m->revindex_data + pos - m->num_objects_in_base);
} }


struct midx_pack_key { struct midx_pack_key {
@ -491,7 +501,8 @@ static int midx_pack_order_cmp(const void *va, const void *vb)
const struct midx_pack_key *key = va; const struct midx_pack_key *key = va;
struct multi_pack_index *midx = key->midx; struct multi_pack_index *midx = key->midx;


uint32_t versus = pack_pos_to_midx(midx, (uint32_t*)vb - (const uint32_t *)midx->revindex_data); size_t pos = (uint32_t *)vb - (const uint32_t *)midx->revindex_data;
uint32_t versus = pack_pos_to_midx(midx, pos + midx->num_objects_in_base);
uint32_t versus_pack = nth_midxed_pack_int_id(midx, versus); uint32_t versus_pack = nth_midxed_pack_int_id(midx, versus);
off_t versus_offset; off_t versus_offset;


@ -529,9 +540,9 @@ static int midx_key_to_pack_pos(struct multi_pack_index *m,
{ {
uint32_t *found; uint32_t *found;


if (key->pack >= m->num_packs) if (key->pack >= m->num_packs + m->num_packs_in_base)
BUG("MIDX pack lookup out of bounds (%"PRIu32" >= %"PRIu32")", BUG("MIDX pack lookup out of bounds (%"PRIu32" >= %"PRIu32")",
key->pack, m->num_packs); key->pack, m->num_packs + m->num_packs_in_base);
/* /*
* The preferred pack sorts first, so determine its identifier by * The preferred pack sorts first, so determine its identifier by
* looking at the first object in pseudo-pack order. * looking at the first object in pseudo-pack order.
@ -551,7 +562,8 @@ static int midx_key_to_pack_pos(struct multi_pack_index *m,
if (!found) if (!found)
return -1; return -1;


*pos = found - m->revindex_data; *pos = (found - m->revindex_data) + m->num_objects_in_base;

return 0; return 0;
} }


@ -559,9 +571,13 @@ int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
{ {
struct midx_pack_key key; struct midx_pack_key key;


while (m && at < m->num_objects_in_base)
m = m->base_midx;
if (!m)
BUG("NULL multi-pack-index for object position: %"PRIu32, at);
if (!m->revindex_data) if (!m->revindex_data)
BUG("midx_to_pack_pos: reverse index not yet loaded"); BUG("midx_to_pack_pos: reverse index not yet loaded");
if (m->num_objects <= at) if (m->num_objects + m->num_objects_in_base <= at)
BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at); BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at);


key.pack = nth_midxed_pack_int_id(m, at); key.pack = nth_midxed_pack_int_id(m, at);