diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 440541045d..814ef810a3 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -120,7 +120,7 @@ CHUNK DATA: * The rest of the chunk is the concatenation of all the computed Bloom filters for the commits in lexicographic order. * Note: Commits with no changes or more than 512 changes have Bloom filters - of length zero. + of length one, with either all bits set to zero or one respectively. * The BDAT chunk is present if and only if BIDX is present. Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional] diff --git a/bloom.c b/bloom.c index db9fb82437..d234551ce0 100644 --- a/bloom.c +++ b/bloom.c @@ -177,6 +177,13 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data, return strcmp(e1->path, e2->path); } +static void init_truncated_large_filter(struct bloom_filter *filter) +{ + filter->data = xmalloc(1); + filter->data[0] = 0xFF; + filter->len = 1; +} + struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, struct commit *c, int compute_if_not_present, @@ -260,12 +267,18 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, } if (hashmap_get_size(&pathmap) > settings->max_changed_paths) { + init_truncated_large_filter(filter); if (computed) *computed |= BLOOM_TRUNC_LARGE; goto cleanup; } filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; + if (!filter->len) { + if (computed) + *computed |= BLOOM_TRUNC_EMPTY; + filter->len = 1; + } filter->data = xcalloc(filter->len, sizeof(unsigned char)); hashmap_for_each_entry(&pathmap, &iter, e, entry) { @@ -279,8 +292,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, } else { for (i = 0; i < diff_queued_diff.nr; i++) diff_free_filepair(diff_queued_diff.queue[i]); - filter->data = NULL; - filter->len = 0; + init_truncated_large_filter(filter); if (computed) *computed |= BLOOM_TRUNC_LARGE; diff --git a/bloom.h b/bloom.h index c6d77e8393..adde6dfe21 100644 --- a/bloom.h +++ b/bloom.h @@ -93,6 +93,7 @@ enum bloom_filter_computed { BLOOM_NOT_COMPUTED = (1 << 0), BLOOM_COMPUTED = (1 << 1), BLOOM_TRUNC_LARGE = (1 << 2), + BLOOM_TRUNC_EMPTY = (1 << 3), }; struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, diff --git a/commit-graph.c b/commit-graph.c index 4d6ce2967e..0a9ace06fb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -968,6 +968,7 @@ struct write_commit_graph_context { int count_bloom_filter_computed; int count_bloom_filter_not_computed; + int count_bloom_filter_trunc_empty; int count_bloom_filter_trunc_large; }; @@ -1396,6 +1397,8 @@ static void trace2_bloom_filter_write_statistics(struct write_commit_graph_conte ctx->count_bloom_filter_computed); trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed", ctx->count_bloom_filter_not_computed); + trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-empty", + ctx->count_bloom_filter_trunc_empty); trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large", ctx->count_bloom_filter_trunc_large); } @@ -1432,6 +1435,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) &computed); if (computed & BLOOM_COMPUTED) { ctx->count_bloom_filter_computed++; + if (computed & BLOOM_TRUNC_EMPTY) + ctx->count_bloom_filter_trunc_empty++; if (computed & BLOOM_TRUNC_LARGE) ctx->count_bloom_filter_trunc_large++; } else if (computed & BLOOM_NOT_COMPUTED) diff --git a/t/t0095-bloom.sh b/t/t0095-bloom.sh index 232ba2c485..7e4ab1795f 100755 --- a/t/t0095-bloom.sh +++ b/t/t0095-bloom.sh @@ -71,8 +71,8 @@ test_expect_success 'get bloom filters for commit with no changes' ' git init && git commit --allow-empty -m "c0" && cat >expect <<-\EOF && - Filter_Length:0 - Filter_Data: + Filter_Length:1 + Filter_Data:00| EOF test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual && test_cmp expect actual @@ -107,8 +107,8 @@ test_expect_success EXPENSIVE 'get bloom filter for commit with 513 changes' ' git add bigDir && git commit -m "commit with 513 changes" && cat >expect <<-\EOF && - Filter_Length:0 - Filter_Data: + Filter_Length:1 + Filter_Data:ff| EOF test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual && test_cmp expect actual diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index 6535a3c5d5..ec9845c9be 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -30,8 +30,10 @@ test_expect_success 'setup test - repo, commits, commit graph, log outputs' ' rm file_to_be_deleted && git add . && git commit -m "file removed" && + git commit --allow-empty -m "empty" && git commit-graph write --reachable --changed-paths ' + graph_read_expect () { NUM_CHUNKS=5 cat >expect <<- EOF @@ -44,7 +46,7 @@ graph_read_expect () { } test_expect_success 'commit-graph write wrote out the bloom chunks' ' - graph_read_expect 15 + graph_read_expect 16 ' # Turn off any inherited trace2 settings for this test. @@ -151,7 +153,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' ' test_bloom_filters_used_when_some_filters_are_missing () { log_args=$1 - bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":8" + bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9" setup "$log_args" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && test_cmp log_wo_bloom log_w_bloom @@ -180,10 +182,18 @@ test_max_changed_paths () { grep "\"max_changed_paths\":$1" $2 } +test_filter_not_computed () { + grep "\"key\":\"filter-not-computed\",\"value\":\"$1\"" $2 +} + test_filter_computed () { grep "\"key\":\"filter-computed\",\"value\":\"$1\"" $2 } +test_filter_trunc_empty () { + grep "\"key\":\"filter-trunc-empty\",\"value\":\"$1\"" $2 +} + test_filter_trunc_large () { grep "\"key\":\"filter-trunc-large\",\"value\":\"$1\"" $2 } @@ -278,4 +288,21 @@ test_expect_success 'correctly report changes over limit' ' ) ' +test_expect_success 'correctly report commits with no changed paths' ' + git init empty && + test_when_finished "rm -fr empty" && + ( + cd empty && + + git commit --allow-empty -m "initial commit" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + git commit-graph write --reachable --changed-paths && + test_filter_computed 1 trace.event && + test_filter_not_computed 0 trace.event && + test_filter_trunc_empty 1 trace.event && + test_filter_trunc_large 0 trace.event + ) +' + test_done