diff --git a/Documentation/config/feature.txt b/Documentation/config/feature.txt index 17b4d39f89..bf9546fca4 100644 --- a/Documentation/config/feature.txt +++ b/Documentation/config/feature.txt @@ -14,6 +14,9 @@ feature.experimental:: + * `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by skipping more commits at a time, reducing the number of round trips. ++ +* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by +walking fewer objects. feature.manyFiles:: Enable config options that optimize for repos with many files in the diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index d4c7c9d4e4..3748136d14 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -123,6 +123,23 @@ pack.useBitmaps:: true. You should not generally need to turn this off unless you are debugging pack bitmaps. +pack.useBitmapBoundaryTraversal:: + When true, Git will use an experimental algorithm for computing + reachability queries with bitmaps. Instead of building up + complete bitmaps for all of the negated tips and then OR-ing + them together, consider negated tips with existing bitmaps as + additive (i.e. OR-ing them into the result if they exist, + ignoring them otherwise), and build up a bitmap at the boundary + instead. ++ +When using this algorithm, Git may include too many objects as a result +of not opening up trees belonging to certain UNINTERESTING commits. This +inexactness matches the non-bitmap traversal algorithm. ++ +In many cases, this can provide a speed-up over the exact algorithm, +particularly when there is poor bitmap coverage of the negated side of +the query. + pack.useSparse:: When true, git will default to using the '--sparse' option in 'git pack-objects' when the '--revs' option is present. This diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index a18b13a41d..2528f25e31 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -29,6 +29,7 @@ linux-TEST-vars) export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master export GIT_TEST_NO_WRITE_REV_INDEX=1 export GIT_TEST_CHECKOUT_WORKERS=2 + export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 ;; linux-clang) export GIT_TEST_DEFAULT_HASH=sha1 diff --git a/pack-bitmap.c b/pack-bitmap.c index 5d2cc6ac96..e8a1579b16 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1077,6 +1077,126 @@ static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git, return base; } +struct bitmap_boundary_cb { + struct bitmap_index *bitmap_git; + struct bitmap *base; + + struct object_array boundary; +}; + +static void show_boundary_commit(struct commit *commit, void *_data) +{ + struct bitmap_boundary_cb *data = _data; + + if (commit->object.flags & BOUNDARY) + add_object_array(&commit->object, "", &data->boundary); + + if (commit->object.flags & UNINTERESTING) { + if (bitmap_walk_contains(data->bitmap_git, data->base, + &commit->object.oid)) + return; + + add_commit_to_bitmap(data->bitmap_git, &data->base, commit); + } +} + +static void show_boundary_object(struct object *object, + const char *name, void *data) +{ + BUG("should not be called"); +} + +static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, + struct rev_info *revs, + struct object_list *roots) +{ + struct bitmap_boundary_cb cb; + struct object_list *root; + unsigned int i; + unsigned int tmp_blobs, tmp_trees, tmp_tags; + int any_missing = 0; + + cb.bitmap_git = bitmap_git; + cb.base = bitmap_new(); + object_array_init(&cb.boundary); + + revs->ignore_missing_links = 1; + + /* + * OR in any existing reachability bitmaps among `roots` into + * `cb.base`. + */ + for (root = roots; root; root = root->next) { + struct object *object = root->item; + if (object->type != OBJ_COMMIT || + bitmap_walk_contains(bitmap_git, cb.base, &object->oid)) + continue; + + if (add_commit_to_bitmap(bitmap_git, &cb.base, + (struct commit *)object)) + continue; + + any_missing = 1; + } + + if (!any_missing) + goto cleanup; + + tmp_blobs = revs->blob_objects; + tmp_trees = revs->tree_objects; + tmp_tags = revs->blob_objects; + revs->blob_objects = 0; + revs->tree_objects = 0; + revs->tag_objects = 0; + + /* + * We didn't have complete coverage of the roots. First setup a + * revision walk to (a) OR in any bitmaps that are UNINTERESTING + * between the tips and boundary, and (b) record the boundary. + */ + trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository); + if (prepare_revision_walk(revs)) + die("revision walk setup failed"); + trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository); + + trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository); + revs->boundary = 1; + traverse_commit_list_filtered(revs, + show_boundary_commit, + show_boundary_object, + &cb, NULL); + revs->boundary = 0; + trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository); + + revs->blob_objects = tmp_blobs; + revs->tree_objects = tmp_trees; + revs->tag_objects = tmp_tags; + + reset_revision_walk(); + clear_object_flags(UNINTERESTING); + + /* + * Then add the boundary commit(s) as fill-in traversal tips. + */ + trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository); + for (i = 0; i < cb.boundary.nr; i++) { + struct object *obj = cb.boundary.objects[i].item; + if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid)) + obj->flags |= SEEN; + else + add_pending_object(revs, obj, ""); + } + if (revs->pending.nr) + cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL); + trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository); + +cleanup: + object_array_clear(&cb.boundary); + revs->ignore_missing_links = 0; + + return cb.base; +} + static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, @@ -1142,8 +1262,21 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, } } - if (needs_walk) + if (needs_walk) { + /* + * This fill-in traversal may walk over some objects + * again, since we have already traversed in order to + * find the boundary. + * + * But this extra walk should be extremely cheap, since + * all commit objects are loaded into memory, and + * because we skip walking to parents that are + * UNINTERESTING, since it will be marked in the haves + * bitmap already (or it has an on-disk bitmap, since + * OR-ing it in covers all of its ancestors). + */ base = fill_in_bitmap(bitmap_git, revs, base, seen); + } return base; } @@ -1535,6 +1668,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects) { unsigned int i; + int use_boundary_traversal; struct object_list *wants = NULL; struct object_list *haves = NULL; @@ -1585,13 +1719,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, object_list_insert(object, &wants); } - /* - * if we have a HAVES list, but none of those haves is contained - * in the packfile that has a bitmap, we don't have anything to - * optimize here - */ - if (haves && !in_bitmapped_pack(bitmap_git, haves)) - goto cleanup; + use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1); + if (use_boundary_traversal < 0) { + prepare_repo_settings(revs->repo); + use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal; + } + + if (!use_boundary_traversal) { + /* + * if we have a HAVES list, but none of those haves is contained + * in the packfile that has a bitmap, we don't have anything to + * optimize here + */ + if (haves && !in_bitmapped_pack(bitmap_git, haves)) + goto cleanup; + } /* if we don't want anything, we're done here */ if (!wants) @@ -1605,18 +1747,32 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (load_bitmap(revs->repo, bitmap_git) < 0) goto cleanup; - object_array_clear(&revs->pending); + if (!use_boundary_traversal) + object_array_clear(&revs->pending); if (haves) { - revs->ignore_missing_links = 1; - haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); - reset_revision_walk(); - revs->ignore_missing_links = 0; + if (use_boundary_traversal) { + trace2_region_enter("pack-bitmap", "haves/boundary", the_repository); + haves_bitmap = find_boundary_objects(bitmap_git, revs, haves); + trace2_region_leave("pack-bitmap", "haves/boundary", the_repository); + } else { + trace2_region_enter("pack-bitmap", "haves/classic", the_repository); + revs->ignore_missing_links = 1; + haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); + reset_revision_walk(); + revs->ignore_missing_links = 0; + trace2_region_leave("pack-bitmap", "haves/classic", the_repository); + } if (!haves_bitmap) BUG("failed to perform bitmap walk"); } + if (use_boundary_traversal) { + object_array_clear(&revs->pending); + reset_revision_walk(); + } + wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); if (!wants_bitmap) diff --git a/pack-bitmap.h b/pack-bitmap.h index f0180b5276..daa72b6331 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -62,6 +62,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *, void test_bitmap_walk(struct rev_info *revs); int test_bitmap_commits(struct repository *r); int test_bitmap_hashes(struct repository *r); + +#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL" + struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects); uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); diff --git a/repo-settings.c b/repo-settings.c index d220c5dd9f..7b566d729d 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -41,8 +41,10 @@ void prepare_repo_settings(struct repository *r) repo_cfg_bool(r, "feature.experimental", &experimental, 0); /* Defaults modified by feature.* */ - if (experimental) + if (experimental) { r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING; + r->settings.pack_use_bitmap_boundary_traversal = 1; + } if (manyfiles) { r->settings.index_version = 4; r->settings.index_skip_hash = 1; @@ -62,6 +64,9 @@ void prepare_repo_settings(struct repository *r) repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0); repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash); repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1); + repo_cfg_bool(r, "pack.usebitmapboundarytraversal", + &r->settings.pack_use_bitmap_boundary_traversal, + r->settings.pack_use_bitmap_boundary_traversal); /* * The GIT_TEST_MULTI_PACK_INDEX variable is special in that diff --git a/repository.h b/repository.h index 1a13ff2867..c42f7ab6bd 100644 --- a/repository.h +++ b/repository.h @@ -37,6 +37,7 @@ struct repo_settings { int command_requires_full_index; int sparse_index; int pack_read_reverse_index; + int pack_use_bitmap_boundary_traversal; struct fsmonitor_settings *fsmonitor; /* lazily loaded */ diff --git a/t/README b/t/README index bdfac4cceb..b71a065e4a 100644 --- a/t/README +++ b/t/README @@ -442,6 +442,10 @@ GIT_TEST_INDEX_VERSION= exercises the index read/write code path for the index version specified. Can be set to any valid version (currently 2, 3, or 4). +GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL= if enabled will +use the boundary-based bitmap traversal algorithm. See the documentation +of `pack.useBitmapBoundaryTraversal` for more details. + GIT_TEST_PACK_SPARSE= if disabled will default the pack-objects builtin to use the non-sparse object walk. This can still be overridden by the --sparse command-line argument. diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 526a5a506e..78c1c6c923 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -9,6 +9,10 @@ test_description='exercise basic bitmap functionality' # their place. GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +# Likewise, allow individual tests to control whether or not they use +# the boundary-based traversal. +sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL + objpath () { echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')" } @@ -457,6 +461,13 @@ test_bitmap_cases () { test_bitmap_cases +GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 +export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL + +test_bitmap_cases + +sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL + test_expect_success 'incremental repack fails when bitmaps are requested' ' test_commit more-1 && test_must_fail git repack -d 2>err && @@ -468,6 +479,33 @@ test_expect_success 'incremental repack can disable bitmaps' ' git repack -d --no-write-bitmap-index ' +test_expect_success 'boundary-based traversal is used when requested' ' + git repack -a -d --write-bitmap-index && + + for argv in \ + "git -c pack.useBitmapBoundaryTraversal=true" \ + "git -c feature.experimental=true" \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 git" + do + eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \ + --use-bitmap-index second..other 2>perf" && + grep "\"region_enter\".*\"label\":\"haves/boundary\"" perf || + return 1 + done && + + for argv in \ + "git -c pack.useBitmapBoundaryTraversal=false" \ + "git -c feature.experimental=true -c pack.useBitmapBoundaryTraversal=false" \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c pack.useBitmapBoundaryTraversal=true" \ + "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c feature.experimental=true" + do + eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \ + --use-bitmap-index second..other 2>perf" && + grep "\"region_enter\".*\"label\":\"haves/classic\"" perf || + return 1 + done +' + test_bitmap_cases "pack.writeBitmapLookupTable" test_expect_success 'verify writing bitmap lookup table when enabled' '