bloom: optimize multiple pathspec items in revision

To enable optimize multiple pathspec items in revision traversal,
return 0 if all pathspec item is literal in forbid_bloom_filters().
Add for loops to initialize and check each pathspec item's bloom_keyvec
when optimization is possible.

Add new test cases in t/t4216-log-bloom.sh to ensure
 - consistent results between the optimization for multiple pathspec
   items using bloom filter and the case without bloom filter
   optimization.
 - does not use bloom filter if any pathspec item is not literal.

With these optimizations, we get some improvements for multi-pathspec runs
of 'git log'. First, in the Git repository we see these modest results:

Benchmark 1: old
 Time (mean ± σ):      73.1 ms ±   2.9 ms
 Range (min … max):    69.9 ms …  84.5 ms    42 runs

Benchmark 2: new
 Time (mean ± σ):      55.1 ms ±   2.9 ms
 Range (min … max):    51.1 ms …  61.2 ms    52 runs

Summary
 'new' ran
   1.33 ± 0.09 times faster than 'old'

But in a larger repo, such as the LLVM project repo below, we get even
better results:

Benchmark 1: old
 Time (mean ± σ):      1.974 s ±  0.006 s
 Range (min … max):    1.960 s …  1.983 s    10 runs

Benchmark 2: new
 Time (mean ± σ):     262.9 ms ±   2.4 ms
 Range (min … max):   257.7 ms … 266.2 ms    11 runs

Summary
 'new' ran
   7.51 ± 0.07 times faster than 'old'

Signed-off-by: Derrick Stolee <stolee@gmail.com>
[ly: rename convert_pathspec_to_filter() to convert_pathspec_to_bloom_keyvec()]
Signed-off-by: Lidong Yan <502024330056@smail.nju.edu.cn>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Lidong Yan 2025-07-15 10:56:22 +08:00 committed by Junio C Hamano
parent 937153dece
commit 2a6ce090f2
2 changed files with 25 additions and 19 deletions

View File

@ -675,12 +675,11 @@ static int forbid_bloom_filters(struct pathspec *spec)
{ {
if (spec->has_wildcard) if (spec->has_wildcard)
return 1; return 1;
if (spec->nr > 1)
return 1;
if (spec->magic & ~PATHSPEC_LITERAL) if (spec->magic & ~PATHSPEC_LITERAL)
return 1; return 1;
if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL)) for (size_t nr = 0; nr < spec->nr; nr++)
return 1; if (spec->items[nr].magic & ~PATHSPEC_LITERAL)
return 1;


return 0; return 0;
} }
@ -733,13 +732,15 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
if (!revs->pruning.pathspec.nr) if (!revs->pruning.pathspec.nr)
return; return;


revs->bloom_keyvecs_nr = 1; revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr;
CALLOC_ARRAY(revs->bloom_keyvecs, 1); CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr);


if (convert_pathspec_to_bloom_keyvec(&revs->bloom_keyvecs[0], for (int i = 0; i < revs->pruning.pathspec.nr; i++) {
&revs->pruning.pathspec.items[0], if (convert_pathspec_to_bloom_keyvec(&revs->bloom_keyvecs[i],
revs->bloom_filter_settings)) &revs->pruning.pathspec.items[i],
goto fail; revs->bloom_filter_settings))
goto fail;
}


if (trace2_is_enabled() && !bloom_filter_atexit_registered) { if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
atexit(trace2_bloom_filter_statistics_atexit); atexit(trace2_bloom_filter_statistics_atexit);

View File

@ -66,8 +66,9 @@ sane_unset GIT_TRACE2_CONFIG_PARAMS


setup () { setup () {
rm -f "$TRASH_DIRECTORY/trace.perf" && rm -f "$TRASH_DIRECTORY/trace.perf" &&
git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom && eval git -c core.commitGraph=false log --pretty="format:%s" "$1" >log_wo_bloom &&
GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom eval "GIT_TRACE2_PERF=\"$TRASH_DIRECTORY/trace.perf\"" \
git -c core.commitGraph=true log --pretty="format:%s" "$1" >log_w_bloom
} }


test_bloom_filters_used () { test_bloom_filters_used () {
@ -138,10 +139,6 @@ test_expect_success 'git log with --walk-reflogs does not use Bloom filters' '
test_bloom_filters_not_used "--walk-reflogs -- A" test_bloom_filters_not_used "--walk-reflogs -- A"
' '


test_expect_success 'git log -- multiple path specs does not use Bloom filters' '
test_bloom_filters_not_used "-- file4 A/file1"
'

test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' ' test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' '
test_bloom_filters_not_used "-- ." test_bloom_filters_not_used "-- ."
' '
@ -151,9 +148,17 @@ test_expect_success 'git log with wildcard that resolves to a single path uses B
test_bloom_filters_used "-- *renamed" test_bloom_filters_used "-- *renamed"
' '


test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' ' test_expect_success 'git log with multiple literal paths uses Bloom filter' '
test_bloom_filters_not_used "-- *" && test_bloom_filters_used "-- file4 A/file1" &&
test_bloom_filters_not_used "-- file*" test_bloom_filters_used "-- *" &&
test_bloom_filters_used "-- file*"
'

test_expect_success 'git log with path contains a wildcard does not use Bloom filter' '
test_bloom_filters_not_used "-- file\*" &&
test_bloom_filters_not_used "-- A/\* file4" &&
test_bloom_filters_not_used "-- file4 A/\*" &&
test_bloom_filters_not_used "-- * A/\*"
' '


test_expect_success 'setup - add commit-graph to the chain without Bloom filters' ' test_expect_success 'setup - add commit-graph to the chain without Bloom filters' '