Merge branch 'ly/changed-path-traversal-with-magic-pathspec'

Revision traversal limited with pathspec, like "git log dir/*",
used to ignore changed-paths Bloom filter when the pathspec
contained wildcards; now they take advantage of the filter when
they can.

* ly/changed-path-traversal-with-magic-pathspec:
  bloom: enable bloom filter with wildcard pathspec in revision traversal
main
Junio C Hamano 2025-08-21 13:47:02 -07:00
commit b4e38c1acd
2 changed files with 56 additions and 17 deletions

View File

@ -671,12 +671,17 @@ static void trace2_bloom_filter_statistics_atexit(void)

static int forbid_bloom_filters(struct pathspec *spec)
{
if (spec->has_wildcard)
return 1;
if (spec->magic & ~PATHSPEC_LITERAL)
unsigned int allowed_magic =
PATHSPEC_FROMTOP |
PATHSPEC_MAXDEPTH |
PATHSPEC_LITERAL |
PATHSPEC_GLOB |
PATHSPEC_ATTR;

if (spec->magic & ~allowed_magic)
return 1;
for (size_t nr = 0; nr < spec->nr; nr++)
if (spec->items[nr].magic & ~PATHSPEC_LITERAL)
if (spec->items[nr].magic & ~allowed_magic)
return 1;

return 0;
@ -691,23 +696,34 @@ static int convert_pathspec_to_bloom_keyvec(struct bloom_keyvec **out,
char *path_alloc = NULL;
const char *path;
size_t len;
int res = 0;
int res = -1;

len = pi->nowildcard_len;
if (len != pi->len) {
/*
* for path like "dir/file*", nowildcard part would be
* "dir/file", but only "dir" should be used for the
* bloom filter.
*/
while (len > 0 && pi->match[len - 1] != '/')
len--;
}
/* remove single trailing slash from path, if needed */
if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
path_alloc = xmemdupz(pi->match, pi->len - 1);
if (len > 0 && pi->match[len - 1] == '/')
len--;

if (!len)
goto cleanup;

if (len != pi->len) {
path_alloc = xmemdupz(pi->match, len);
path = path_alloc;
} else
path = pi->match;

len = strlen(path);
if (!len) {
res = -1;
goto cleanup;
}

*out = bloom_keyvec_new(path, len, settings);

res = 0;
cleanup:
free(path_alloc);
return res;

View File

@ -154,11 +154,34 @@ test_expect_success 'git log with multiple literal paths uses Bloom filter' '
test_bloom_filters_used "-- file*"
'

test_expect_success 'git log with path contains a wildcard does not use Bloom filter' '
test_expect_success 'git log with paths all contain non-wildcard part uses Bloom filter' '
test_bloom_filters_used "-- A/\* file4" &&
test_bloom_filters_used "-- A/file\*" &&
test_bloom_filters_used "-- * A/\*"
'

test_expect_success 'git log with path only contains wildcard part does not use Bloom filter' '
test_bloom_filters_not_used "-- file\*" &&
test_bloom_filters_not_used "-- A/\* file4" &&
test_bloom_filters_not_used "-- file4 A/\*" &&
test_bloom_filters_not_used "-- * A/\*"
test_bloom_filters_not_used "-- file\* A/\*" &&
test_bloom_filters_not_used "-- file\* *" &&
test_bloom_filters_not_used "-- \*"
'

test_expect_success 'git log with path contains various magic signatures' '
cd A &&
test_bloom_filters_used "-- \:\(top\)B" &&
cd .. &&

test_bloom_filters_used "-- \:\(glob\)A/\*\*/C" &&
test_bloom_filters_not_used "-- \:\(icase\)FILE4" &&
test_bloom_filters_not_used "-- \:\(exclude\)A/B/C" &&

test_when_finished "rm -f .gitattributes" &&
cat >.gitattributes <<-EOF &&
A/file1 text
A/B/file2 -text
EOF
test_bloom_filters_used "-- \:\(attr\:text\)A"
'

test_expect_success 'setup - add commit-graph to the chain without Bloom filters' '