diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 1c8070f99c..1bcb3f98a4 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -32,6 +32,7 @@ struct bitmapped_commit { struct commit *commit; struct ewah_bitmap *bitmap; struct ewah_bitmap *write_as; + struct ewah_bitmap *pseudo_merge_parents; int flags; int xor_offset; uint32_t commit_pos; @@ -89,6 +90,7 @@ void bitmap_writer_free(struct bitmap_writer *writer) ewah_free(writer->tags); kh_destroy_oid_map(writer->bitmaps); + free(writer->pos_cache); kh_foreach_value(writer->pseudo_merge_commits, idx, free_pseudo_merge_commit_idx(idx)); @@ -101,6 +103,7 @@ void bitmap_writer_free(struct bitmap_writer *writer) if (bc->write_as != bc->bitmap) ewah_free(bc->write_as); ewah_free(bc->bitmap); + ewah_free(bc->pseudo_merge_parents); } free(writer->selected); } @@ -209,38 +212,116 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer, writer->selected[writer->selected_nr].write_as = NULL; writer->selected[writer->selected_nr].flags = 0; writer->selected[writer->selected_nr].pseudo_merge = pseudo_merge; + writer->selected[writer->selected_nr].pseudo_merge_parents = NULL; writer->selected_nr++; } +struct bitmap_pos_cache_entry { + struct object_id oid; + uint32_t pos; +}; + +#define BITMAP_POS_MIN_CACHE_SIZE (1U << 10) +#define BITMAP_POS_MAX_CACHE_SIZE (1U << 21) +#define BITMAP_POS_CACHE_VALID (1U << 31) + +static void bitmap_writer_init_pos_cache(struct bitmap_writer *writer) +{ + if (writer->pos_cache) + return; + + writer->pos_cache_nr = BITMAP_POS_MIN_CACHE_SIZE; + + while (writer->pos_cache_nr < writer->to_pack->nr_objects && + writer->pos_cache_nr < BITMAP_POS_MAX_CACHE_SIZE) + writer->pos_cache_nr <<= 1; + + CALLOC_ARRAY(writer->pos_cache, writer->pos_cache_nr); +} + +static size_t bitmap_writer_pos_cache_slot(struct bitmap_writer *writer, + const struct object_id *oid) +{ + return oidhash(oid) & (writer->pos_cache_nr - 1); +} + +static bool bitmap_writer_pos_cache_valid(struct bitmap_writer *writer, + size_t slot) +{ + return !!(writer->pos_cache[slot].pos & BITMAP_POS_CACHE_VALID); +} + +static int find_cached_object_pos(struct bitmap_writer *writer, + const struct object_id *oid, uint32_t *pos) +{ + size_t slot = bitmap_writer_pos_cache_slot(writer, oid); + + if (bitmap_writer_pos_cache_valid(writer, slot) && + oideq(&writer->pos_cache[slot].oid, oid)) { + writer->pos_cache_hits++; + *pos = writer->pos_cache[slot].pos & ~BITMAP_POS_CACHE_VALID; + return 1; + } + + writer->pos_cache_misses++; + return 0; +} + +static uint32_t store_cached_object_pos(struct bitmap_writer *writer, + const struct object_id *oid, + uint32_t pos) +{ + size_t slot; + + if (pos & BITMAP_POS_CACHE_VALID) + return pos; /* too large to cache */ + + slot = bitmap_writer_pos_cache_slot(writer, oid); + + oidcpy(&writer->pos_cache[slot].oid, oid); + writer->pos_cache[slot].pos = pos | BITMAP_POS_CACHE_VALID; + + return pos; +} + static uint32_t find_object_pos(struct bitmap_writer *writer, const struct object_id *oid, int *found) { struct object_entry *entry; + uint32_t pos; + + bitmap_writer_init_pos_cache(writer); + + if (find_cached_object_pos(writer, oid, &pos)) { + if (found) + *found = 1; + return pos; + } entry = packlist_find(writer->to_pack, oid); if (entry) { uint32_t base_objects = 0; + if (writer->midx) base_objects = writer->midx->num_objects + writer->midx->num_objects_in_base; - - if (found) - *found = 1; - return oe_in_pack_pos(writer->to_pack, entry) + base_objects; + pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects; } else if (writer->midx) { - uint32_t at, pos; + uint32_t at; if (!bsearch_midx(oid, writer->midx, &at)) goto missing; if (midx_to_pack_pos(writer->midx, at, &pos) < 0) goto missing; - - if (found) - *found = 1; - return pos; + } else { + goto missing; } + if (found) + *found = 1; + return store_cached_object_pos(writer, oid, pos); + missing: if (found) *found = 0; @@ -249,11 +330,40 @@ missing: return 0; } +static int bitmapped_commit_date_cmp(const void *_a, const void *_b) +{ + const struct bitmapped_commit *a = _a; + const struct bitmapped_commit *b = _b; + + if (a->commit->date < b->commit->date) + return -1; + if (a->commit->date > b->commit->date) + return 1; + return 0; +} + static void compute_xor_offsets(struct bitmap_writer *writer) { static const int MAX_XOR_OFFSET_SEARCH = 10; int i, next = 0; + int nr = bitmap_writer_nr_selected_commits(writer); + + if (nr > 1) { + QSORT(writer->selected, nr, bitmapped_commit_date_cmp); + + for (i = 0; i < nr; i++) { + struct bitmapped_commit *stored = &writer->selected[i]; + khiter_t hash_pos = kh_get_oid_map(writer->bitmaps, + stored->commit->object.oid); + + if (hash_pos == kh_end(writer->bitmaps)) + BUG("selected commit missing from bitmap map: %s", + oid_to_hex(&stored->commit->object.oid)); + + kh_value(writer->bitmaps, hash_pos) = stored; + } + } while (next < writer->selected_nr) { struct bitmapped_commit *stored = &writer->selected[next]; @@ -336,13 +446,17 @@ static void bitmap_builder_init(struct bitmap_builder *bb, revs.topo_order = 1; revs.first_parent_only = 1; - for (i = 0; i < writer->selected_nr; i++) { + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) { struct bitmapped_commit *bc = &writer->selected[i]; struct bb_commit *ent = bb_data_at(&bb->data, bc->commit); + if (bc->pseudo_merge) + BUG("unexpected pseudo-merge at %"PRIuMAX, + (uintmax_t)i); + ent->selected = 1; ent->maximal = 1; - ent->pseudo_merge = bc->pseudo_merge; + ent->pseudo_merge = 0; ent->idx = i; ent->commit_mask = bitmap_new(); @@ -456,22 +570,13 @@ static void bitmap_builder_clear(struct bitmap_builder *bb) static int fill_bitmap_tree(struct bitmap_writer *writer, struct bitmap *bitmap, - struct tree *tree) + struct tree *tree, + uint32_t pos) { int found; - uint32_t pos; struct tree_desc desc; struct name_entry entry; - /* - * If our bit is already set, then there is nothing to do. Both this - * tree and all of its children will be set. - */ - pos = find_object_pos(writer, &tree->object.oid, &found); - if (!found) - return -1; - if (bitmap_get(bitmap, pos)) - return 0; bitmap_set(bitmap, pos); if (repo_parse_tree(writer->repo, tree) < 0) @@ -482,8 +587,21 @@ static int fill_bitmap_tree(struct bitmap_writer *writer, while (tree_entry(&desc, &entry)) { switch (object_type(entry.mode)) { case OBJ_TREE: + pos = find_object_pos(writer, &entry.oid, &found); + if (!found) + return -1; + if (bitmap_get(bitmap, pos)) { + /* + * If our bit is already set, then there + * is nothing to do. Both this tree and + * all of its children will be set. + */ + break; + } + if (fill_bitmap_tree(writer, bitmap, - lookup_tree(writer->repo, &entry.oid)) < 0) + lookup_tree(writer->repo, + &entry.oid), pos) < 0) return -1; break; case OBJ_BLOB: @@ -504,6 +622,11 @@ static int fill_bitmap_tree(struct bitmap_writer *writer, static int reused_bitmaps_nr; static int reused_pseudo_merge_bitmaps_nr; +static int pseudo_merge_bitmap_nr; +static int pseudo_merge_bitmap_parents; + +static int fill_bitmap_commit_calls_nr; +static int fill_bitmap_commit_found_ancestor_nr; static int fill_bitmap_commit(struct bitmap_writer *writer, struct bb_commit *ent, @@ -514,7 +637,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, const uint32_t *mapping) { int found; + int from_pseudo_merge = commit->object.flags & BITMAP_PSEUDO_MERGE; uint32_t pos; + + if (ent->pseudo_merge) + BUG("unexpected pseudo-merge commit in fill_bitmap_commit()"); + + fill_bitmap_commit_calls_nr++; + if (!ent->bitmap) ent->bitmap = bitmap_new(); @@ -528,10 +658,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, struct ewah_bitmap *old; struct bitmap *remapped = bitmap_new(); - if (commit->object.flags & BITMAP_PSEUDO_MERGE) - old = pseudo_merge_bitmap_for_commit(old_bitmap, c); - else - old = bitmap_for_commit(old_bitmap, c); + old = bitmap_for_commit(old_bitmap, c); /* * If this commit has an old bitmap, then translate that * bitmap and add its bits to this one. No need to walk @@ -540,26 +667,65 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, if (old && !rebuild_bitmap(mapping, old, remapped)) { bitmap_or(ent->bitmap, remapped); bitmap_free(remapped); - if (commit->object.flags & BITMAP_PSEUDO_MERGE) - reused_pseudo_merge_bitmaps_nr++; - else - reused_bitmaps_nr++; + reused_bitmaps_nr++; continue; } bitmap_free(remapped); } + /* + * If we encounter an ancestor for which we have already + * computed a bitmap during this build (i.e. a regular + * selected commit processed earlier in topo order), we can + * short-circuit the walk: its stored bitmap already covers + * the commit itself, its tree, and all of its ancestors. + */ + if (c != commit) { + khiter_t hash_pos = kh_get_oid_map(writer->bitmaps, + c->object.oid); + if (hash_pos != kh_end(writer->bitmaps)) { + struct bitmapped_commit *stored = + kh_value(writer->bitmaps, hash_pos); + if (stored && stored->bitmap) { + fill_bitmap_commit_found_ancestor_nr++; + bitmap_or_ewah(ent->bitmap, + stored->bitmap); + continue; + } + } + } + /* * Mark ourselves and queue our tree. The commit * walk ensures we cover all parents. */ if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) { + struct tree *tree; + + if (from_pseudo_merge && !c->object.parsed) { + /* + * Commits reachable from selected + * non-pseudo-merges are already parsed + * by the regular bitmap build. + * + * However, pseudo-merge fills can also + * reach commits that were not covered + * there, so parse any such leftovers + * before reading their tree or parents. + */ + if (repo_parse_commit(writer->repo, c)) + return -1; + } + pos = find_object_pos(writer, &c->object.oid, &found); if (!found) return -1; bitmap_set(ent->bitmap, pos); - prio_queue_put(tree_queue, - repo_get_commit_tree(writer->repo, c)); + + tree = repo_get_commit_tree(writer->repo, c); + if (!tree) + return -1; + prio_queue_put(tree_queue, tree); } for (p = c->parents; p; p = p->next) { @@ -575,13 +741,158 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, } while (tree_queue->nr) { - if (fill_bitmap_tree(writer, ent->bitmap, - prio_queue_get(tree_queue)) < 0) + struct tree *t = prio_queue_get(tree_queue); + int found; + + pos = find_object_pos(writer, &t->object.oid, &found); + if (!found) + return -1; + if (bitmap_get(ent->bitmap, pos)) { + /* + * If our bit is already set, then there is + * nothing to do. Both this tree and all of its + * children will be set. + */ + continue; + } + + if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0) return -1; } return 0; } +static int reuse_pseudo_merge_bitmap(struct bitmap_index *old_bitmap, + const uint32_t *mapping, + struct commit *merge, + struct ewah_bitmap **out) +{ + struct ewah_bitmap *old; + struct bitmap *remapped; + + if (!old_bitmap || !mapping) + return 0; + + old = pseudo_merge_bitmap_for_commit(old_bitmap, merge); + if (!old) + return 0; + + remapped = bitmap_new(); + if (rebuild_bitmap(mapping, old, remapped) < 0) { + bitmap_free(remapped); + return 0; + } + + *out = bitmap_to_ewah(remapped); + bitmap_free(remapped); + reused_pseudo_merge_bitmaps_nr++; + return 1; +} + +static int build_pseudo_merge_bitmap(struct bitmap_writer *writer, + struct bitmap_index *old_bitmap, + const uint32_t *mapping, + struct commit *merge, + struct ewah_bitmap **out) +{ + struct bb_commit ent = { 0 }; + struct prio_queue queue = { NULL }; + struct prio_queue tree_queue = { NULL }; + unsigned parents = commit_list_count(merge->parents); + int ret; + + ent.bitmap = bitmap_new(); + + pseudo_merge_bitmap_nr++; + pseudo_merge_bitmap_parents += parents; + + if (reuse_pseudo_merge_bitmap(old_bitmap, mapping, merge, out)) { + ret = 0; + goto done; + } + + ret = fill_bitmap_commit(writer, &ent, merge, &queue, &tree_queue, + old_bitmap, mapping); + + if (!ret) + *out = bitmap_to_ewah(ent.bitmap); + +done: + bitmap_free(ent.bitmap); + clear_prio_queue(&queue); + clear_prio_queue(&tree_queue); + + return ret; +} + +static int build_pseudo_merge_bitmaps(struct bitmap_writer *writer, + struct bitmap_index *old_bitmap, + const uint32_t *mapping, + int *nr_stored) +{ + size_t i = bitmap_writer_nr_selected_commits(writer); + int ret = 0; + + if (!writer->pseudo_merges_nr) + return 0; + + trace2_region_enter("pack-bitmap-write", "building_pseudo_merge_bitmaps", + writer->repo); + + for (; i < writer->selected_nr; i++) { + struct bitmapped_commit *merge = &writer->selected[i]; + struct commit_list *p; + struct bitmap *parents = bitmap_new(); + struct ewah_bitmap *objects = NULL; + + if (!merge->pseudo_merge) + BUG("found non-pseudo merge commit at %"PRIuMAX, + (uintmax_t)i); + + for (p = merge->commit->parents; p; p = p->next) { + int found; + uint32_t pos = find_object_pos(writer, + &p->item->object.oid, + &found); + if (!found) { + bitmap_free(parents); + ret = -1; + goto done; + } + bitmap_set(parents, pos); + } + + merge->pseudo_merge_parents = bitmap_to_ewah(parents); + bitmap_free(parents); + + if (build_pseudo_merge_bitmap(writer, old_bitmap, mapping, + merge->commit, &objects) < 0) { + ret = -1; + goto done; + } + merge->bitmap = objects; + + (*nr_stored)++; + display_progress(writer->progress, *nr_stored); + } + +done: + trace2_region_leave("pack-bitmap-write", "building_pseudo_merge_bitmaps", + writer->repo); + + trace2_data_intmax("pack-bitmap-write", writer->repo, + "pseudo_merge_bitmap_nr", + pseudo_merge_bitmap_nr); + trace2_data_intmax("pack-bitmap-write", writer->repo, + "building_bitmaps_pseudo_merge_reused", + reused_pseudo_merge_bitmaps_nr); + trace2_data_intmax("pack-bitmap-write", writer->repo, + "pseudo_merge_bitmap_parents", + pseudo_merge_bitmap_parents); + + return ret; +} + static void store_selected(struct bitmap_writer *writer, struct bb_commit *ent, struct commit *commit) { @@ -616,6 +927,10 @@ int bitmap_writer_build(struct bitmap_writer *writer) writer->progress = start_progress(writer->repo, "Building bitmaps", writer->selected_nr); + + writer->pos_cache_hits = 0; + writer->pos_cache_misses = 0; + trace2_region_enter("pack-bitmap-write", "building_bitmaps_total", writer->repo); @@ -661,6 +976,10 @@ int bitmap_writer_build(struct bitmap_writer *writer) bitmap_free(ent->bitmap); ent->bitmap = NULL; } + if (closed && + build_pseudo_merge_bitmaps(writer, old_bitmap, mapping, + &nr_stored) < 0) + closed = 0; clear_prio_queue(&queue); clear_prio_queue(&tree_queue); bitmap_builder_clear(&bb); @@ -672,8 +991,15 @@ int bitmap_writer_build(struct bitmap_writer *writer) trace2_data_intmax("pack-bitmap-write", writer->repo, "building_bitmaps_reused", reused_bitmaps_nr); trace2_data_intmax("pack-bitmap-write", writer->repo, - "building_bitmaps_pseudo_merge_reused", - reused_pseudo_merge_bitmaps_nr); + "fill_bitmap_commit_calls_nr", + fill_bitmap_commit_calls_nr); + trace2_data_intmax("pack-bitmap-write", writer->repo, + "fill_bitmap_commit_found_ancestor_nr", + fill_bitmap_commit_found_ancestor_nr); + trace2_data_intmax("pack-bitmap-write", writer->repo, + "bitmap_pos_cache_hits", writer->pos_cache_hits); + trace2_data_intmax("pack-bitmap-write", writer->repo, + "bitmap_pos_cache_misses", writer->pos_cache_misses); stop_progress(&writer->progress); @@ -837,42 +1163,29 @@ static void write_pseudo_merges(struct bitmap_writer *writer, struct hashfile *f) { struct oid_array commits = OID_ARRAY_INIT; - struct bitmap **commits_bitmap = NULL; off_t *pseudo_merge_ofs = NULL; off_t start, table_start, next_ext; uint32_t base = bitmap_writer_nr_selected_commits(writer); size_t i, j = 0; - CALLOC_ARRAY(commits_bitmap, writer->pseudo_merges_nr); CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr); - for (i = 0; i < writer->pseudo_merges_nr; i++) { - struct bitmapped_commit *merge = &writer->selected[base + i]; - struct commit_list *p; - - if (!merge->pseudo_merge) - BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i); - - commits_bitmap[i] = bitmap_new(); - - for (p = merge->commit->parents; p; p = p->next) - bitmap_set(commits_bitmap[i], - find_object_pos(writer, &p->item->object.oid, - NULL)); - } - start = hashfile_total(f); for (i = 0; i < writer->pseudo_merges_nr; i++) { - struct ewah_bitmap *commits_ewah = bitmap_to_ewah(commits_bitmap[i]); + struct bitmapped_commit *merge = &writer->selected[base + i]; + + if (!merge->pseudo_merge) + BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i); + + if (!merge->pseudo_merge_parents || !merge->bitmap) + BUG("missing pseudo-merge bitmap for commit %s", + oid_to_hex(&merge->commit->object.oid)); pseudo_merge_ofs[i] = hashfile_total(f); - - dump_bitmap(f, commits_ewah); - dump_bitmap(f, writer->selected[base+i].write_as); - - ewah_free(commits_ewah); + dump_bitmap(f, merge->pseudo_merge_parents); + dump_bitmap(f, merge->bitmap); } next_ext = st_add(hashfile_total(f), @@ -955,12 +1268,8 @@ static void write_pseudo_merges(struct bitmap_writer *writer, hashwrite_be64(f, table_start - start); hashwrite_be64(f, hashfile_total(f) - start + sizeof(uint64_t)); - for (i = 0; i < writer->pseudo_merges_nr; i++) - bitmap_free(commits_bitmap[i]); - oid_array_clear(&commits); free(pseudo_merge_ofs); - free(commits_bitmap); } static int table_cmp(const void *_va, const void *_vb, void *_data) diff --git a/pack-bitmap.h b/pack-bitmap.h index a95e1c2d11..19a8655457 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -132,6 +132,8 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *); +struct bitmap_pos_cache_entry; + struct bitmap_writer { struct repository *repo; struct ewah_bitmap *commits; @@ -143,6 +145,11 @@ struct bitmap_writer { struct packing_data *to_pack; struct multi_pack_index *midx; /* if appending to a MIDX chain */ + struct bitmap_pos_cache_entry *pos_cache; + size_t pos_cache_nr; + uint64_t pos_cache_hits; + uint64_t pos_cache_misses; + struct bitmapped_commit *selected; unsigned int selected_nr, selected_alloc;