Merge branch 'ly/changed-paths-traversal'
Lift the limitation to use changed-path filter in "git log" so that it can be used for a pathspec with multiple literal paths. * ly/changed-paths-traversal: bloom: optimize multiple pathspec items in revision revision: make helper for pathspec to bloom keyvec bloom: replace struct bloom_key * with struct bloom_keyvec bloom: rename function operates on bloom_key bloom: add test helper to return murmur3 hashmaint
commit
f22d4ac4fd
2
blame.c
2
blame.c
|
@ -1311,7 +1311,7 @@ static void add_bloom_key(struct blame_bloom_data *bd,
|
|||
}
|
||||
|
||||
bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
|
||||
fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
|
||||
bloom_key_fill(bd->keys[bd->nr], path, strlen(path), bd->settings);
|
||||
bd->nr++;
|
||||
}
|
||||
|
||||
|
|
84
bloom.c
84
bloom.c
|
@ -107,7 +107,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g,
|
|||
* Not considered to be cryptographically secure.
|
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
|
||||
*/
|
||||
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
|
||||
static uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
|
||||
{
|
||||
const uint32_t c1 = 0xcc9e2d51;
|
||||
const uint32_t c2 = 0x1b873593;
|
||||
|
@ -221,9 +221,7 @@ static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len)
|
|||
return seed;
|
||||
}
|
||||
|
||||
void fill_bloom_key(const char *data,
|
||||
size_t len,
|
||||
struct bloom_key *key,
|
||||
void bloom_key_fill(struct bloom_key *key, const char *data, size_t len,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
int i;
|
||||
|
@ -243,7 +241,7 @@ void fill_bloom_key(const char *data,
|
|||
key->hashes[i] = hash0 + i * hash1;
|
||||
}
|
||||
|
||||
void clear_bloom_key(struct bloom_key *key)
|
||||
void bloom_key_clear(struct bloom_key *key)
|
||||
{
|
||||
FREE_AND_NULL(key->hashes);
|
||||
}
|
||||
|
@ -280,6 +278,55 @@ void deinit_bloom_filters(void)
|
|||
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
|
||||
}
|
||||
|
||||
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
struct bloom_keyvec *vec;
|
||||
const char *p;
|
||||
size_t sz;
|
||||
size_t nr = 1;
|
||||
|
||||
p = path;
|
||||
while (*p) {
|
||||
/*
|
||||
* At this point, the path is normalized to use Unix-style
|
||||
* path separators. This is required due to how the
|
||||
* changed-path Bloom filters store the paths.
|
||||
*/
|
||||
if (*p == '/')
|
||||
nr++;
|
||||
p++;
|
||||
}
|
||||
|
||||
sz = sizeof(struct bloom_keyvec);
|
||||
sz += nr * sizeof(struct bloom_key);
|
||||
vec = (struct bloom_keyvec *)xcalloc(1, sz);
|
||||
if (!vec)
|
||||
return NULL;
|
||||
vec->count = nr;
|
||||
|
||||
bloom_key_fill(&vec->key[0], path, len, settings);
|
||||
nr = 1;
|
||||
p = path + len - 1;
|
||||
while (p > path) {
|
||||
if (*p == '/') {
|
||||
bloom_key_fill(&vec->key[nr++], path, p - path, settings);
|
||||
}
|
||||
p--;
|
||||
}
|
||||
assert(nr == vec->count);
|
||||
return vec;
|
||||
}
|
||||
|
||||
void bloom_keyvec_free(struct bloom_keyvec *vec)
|
||||
{
|
||||
if (!vec)
|
||||
return;
|
||||
for (size_t nr = 0; nr < vec->count; nr++)
|
||||
bloom_key_clear(&vec->key[nr]);
|
||||
free(vec);
|
||||
}
|
||||
|
||||
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
|
||||
const struct hashmap_entry *eptr,
|
||||
const struct hashmap_entry *entry_or_key,
|
||||
|
@ -500,9 +547,9 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
|
|||
|
||||
hashmap_for_each_entry(&pathmap, &iter, e, entry) {
|
||||
struct bloom_key key;
|
||||
fill_bloom_key(e->path, strlen(e->path), &key, settings);
|
||||
bloom_key_fill(&key, e->path, strlen(e->path), settings);
|
||||
add_key_to_filter(&key, filter, settings);
|
||||
clear_bloom_key(&key);
|
||||
bloom_key_clear(&key);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
|
@ -540,3 +587,26 @@ int bloom_filter_contains(const struct bloom_filter *filter,
|
|||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bloom_filter_contains_vec(const struct bloom_filter *filter,
|
||||
const struct bloom_keyvec *vec,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
|
||||
ret = bloom_filter_contains(filter, &vec->key[nr], settings);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
|
||||
int version)
|
||||
{
|
||||
assert(version == 1 || version == 2);
|
||||
|
||||
if (version == 2)
|
||||
return murmur3_seeded_v2(seed, data, len);
|
||||
else
|
||||
return murmur3_seeded_v1(seed, data, len);
|
||||
}
|
||||
|
|
56
bloom.h
56
bloom.h
|
@ -74,24 +74,40 @@ struct bloom_key {
|
|||
uint32_t *hashes;
|
||||
};
|
||||
|
||||
/*
|
||||
* A bloom_keyvec is a vector of bloom_keys, which
|
||||
* can be used to store multiple keys for a single
|
||||
* pathspec item.
|
||||
*/
|
||||
struct bloom_keyvec {
|
||||
size_t count;
|
||||
struct bloom_key key[FLEX_ARRAY];
|
||||
};
|
||||
|
||||
int load_bloom_filter_from_graph(struct commit_graph *g,
|
||||
struct bloom_filter *filter,
|
||||
uint32_t graph_pos);
|
||||
|
||||
/*
|
||||
* Calculate the murmur3 32-bit hash value for the given data
|
||||
* using the given seed.
|
||||
* Produces a uniformly distributed hash value.
|
||||
* Not considered to be cryptographically secure.
|
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
|
||||
*/
|
||||
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);
|
||||
|
||||
void fill_bloom_key(const char *data,
|
||||
size_t len,
|
||||
struct bloom_key *key,
|
||||
void bloom_key_fill(struct bloom_key *key, const char *data, size_t len,
|
||||
const struct bloom_filter_settings *settings);
|
||||
void clear_bloom_key(struct bloom_key *key);
|
||||
void bloom_key_clear(struct bloom_key *key);
|
||||
|
||||
/*
|
||||
* bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the
|
||||
* given path.
|
||||
*
|
||||
* This function splits the input path by '/' and generates a bloom key for each
|
||||
* prefix, in reverse order of specificity. For example, given the input
|
||||
* "a/b/c", it will generate bloom keys for:
|
||||
* - "a/b/c"
|
||||
* - "a/b"
|
||||
* - "a"
|
||||
*
|
||||
* The resulting keys are stored in a newly allocated bloom_keyvec.
|
||||
*/
|
||||
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
|
||||
const struct bloom_filter_settings *settings);
|
||||
void bloom_keyvec_free(struct bloom_keyvec *vec);
|
||||
|
||||
void add_key_to_filter(const struct bloom_key *key,
|
||||
struct bloom_filter *filter,
|
||||
|
@ -137,4 +153,18 @@ int bloom_filter_contains(const struct bloom_filter *filter,
|
|||
const struct bloom_key *key,
|
||||
const struct bloom_filter_settings *settings);
|
||||
|
||||
/*
|
||||
* bloom_filter_contains_vec - Check if all keys in a key vector are in the
|
||||
* Bloom filter.
|
||||
*
|
||||
* Returns 1 if **all** keys in the vector are present in the filter,
|
||||
* 0 if **any** key is not present.
|
||||
*/
|
||||
int bloom_filter_contains_vec(const struct bloom_filter *filter,
|
||||
const struct bloom_keyvec *v,
|
||||
const struct bloom_filter_settings *settings);
|
||||
|
||||
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
|
||||
int version);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1172,12 +1172,13 @@ static int bloom_filter_check(struct rev_info *rev,
|
|||
return 0;
|
||||
|
||||
while (!result && range) {
|
||||
fill_bloom_key(range->path, strlen(range->path), &key, rev->bloom_filter_settings);
|
||||
bloom_key_fill(&key, range->path, strlen(range->path),
|
||||
rev->bloom_filter_settings);
|
||||
|
||||
if (bloom_filter_contains(filter, &key, rev->bloom_filter_settings))
|
||||
result = 1;
|
||||
|
||||
clear_bloom_key(&key);
|
||||
bloom_key_clear(&key);
|
||||
range = range->next;
|
||||
}
|
||||
|
||||
|
|
126
revision.c
126
revision.c
|
@ -675,24 +675,48 @@ static int forbid_bloom_filters(struct pathspec *spec)
|
|||
{
|
||||
if (spec->has_wildcard)
|
||||
return 1;
|
||||
if (spec->nr > 1)
|
||||
return 1;
|
||||
if (spec->magic & ~PATHSPEC_LITERAL)
|
||||
return 1;
|
||||
if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
|
||||
return 1;
|
||||
for (size_t nr = 0; nr < spec->nr; nr++)
|
||||
if (spec->items[nr].magic & ~PATHSPEC_LITERAL)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_revisions_bloom_keyvecs(struct rev_info *revs);
|
||||
|
||||
static int convert_pathspec_to_bloom_keyvec(struct bloom_keyvec **out,
|
||||
const struct pathspec_item *pi,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
char *path_alloc = NULL;
|
||||
const char *path;
|
||||
size_t len;
|
||||
int res = 0;
|
||||
|
||||
/* remove single trailing slash from path, if needed */
|
||||
if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
|
||||
path_alloc = xmemdupz(pi->match, pi->len - 1);
|
||||
path = path_alloc;
|
||||
} else
|
||||
path = pi->match;
|
||||
|
||||
len = strlen(path);
|
||||
if (!len) {
|
||||
res = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
*out = bloom_keyvec_new(path, len, settings);
|
||||
|
||||
cleanup:
|
||||
free(path_alloc);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
||||
{
|
||||
struct pathspec_item *pi;
|
||||
char *path_alloc = NULL;
|
||||
const char *path, *p;
|
||||
size_t len;
|
||||
int path_component_nr = 1;
|
||||
|
||||
if (!revs->commits)
|
||||
return;
|
||||
|
||||
|
@ -708,48 +732,14 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
|||
if (!revs->pruning.pathspec.nr)
|
||||
return;
|
||||
|
||||
pi = &revs->pruning.pathspec.items[0];
|
||||
revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr;
|
||||
CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr);
|
||||
|
||||
/* remove single trailing slash from path, if needed */
|
||||
if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
|
||||
path_alloc = xmemdupz(pi->match, pi->len - 1);
|
||||
path = path_alloc;
|
||||
} else
|
||||
path = pi->match;
|
||||
|
||||
len = strlen(path);
|
||||
if (!len) {
|
||||
revs->bloom_filter_settings = NULL;
|
||||
free(path_alloc);
|
||||
return;
|
||||
}
|
||||
|
||||
p = path;
|
||||
while (*p) {
|
||||
/*
|
||||
* At this point, the path is normalized to use Unix-style
|
||||
* path separators. This is required due to how the
|
||||
* changed-path Bloom filters store the paths.
|
||||
*/
|
||||
if (*p == '/')
|
||||
path_component_nr++;
|
||||
p++;
|
||||
}
|
||||
|
||||
revs->bloom_keys_nr = path_component_nr;
|
||||
ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
|
||||
|
||||
fill_bloom_key(path, len, &revs->bloom_keys[0],
|
||||
revs->bloom_filter_settings);
|
||||
path_component_nr = 1;
|
||||
|
||||
p = path + len - 1;
|
||||
while (p > path) {
|
||||
if (*p == '/')
|
||||
fill_bloom_key(path, p - path,
|
||||
&revs->bloom_keys[path_component_nr++],
|
||||
revs->bloom_filter_settings);
|
||||
p--;
|
||||
for (int i = 0; i < revs->pruning.pathspec.nr; i++) {
|
||||
if (convert_pathspec_to_bloom_keyvec(&revs->bloom_keyvecs[i],
|
||||
&revs->pruning.pathspec.items[i],
|
||||
revs->bloom_filter_settings))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
|
||||
|
@ -757,14 +747,18 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
|||
bloom_filter_atexit_registered = 1;
|
||||
}
|
||||
|
||||
free(path_alloc);
|
||||
return;
|
||||
|
||||
fail:
|
||||
revs->bloom_filter_settings = NULL;
|
||||
release_revisions_bloom_keyvecs(revs);
|
||||
}
|
||||
|
||||
static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
|
||||
struct commit *commit)
|
||||
{
|
||||
struct bloom_filter *filter;
|
||||
int result = 1, j;
|
||||
int result = 0;
|
||||
|
||||
if (!revs->repo->objects->commit_graph)
|
||||
return -1;
|
||||
|
@ -779,10 +773,10 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (j = 0; result && j < revs->bloom_keys_nr; j++) {
|
||||
result = bloom_filter_contains(filter,
|
||||
&revs->bloom_keys[j],
|
||||
revs->bloom_filter_settings);
|
||||
for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) {
|
||||
result = bloom_filter_contains_vec(filter,
|
||||
revs->bloom_keyvecs[nr],
|
||||
revs->bloom_filter_settings);
|
||||
}
|
||||
|
||||
if (result)
|
||||
|
@ -823,7 +817,7 @@ static int rev_compare_tree(struct rev_info *revs,
|
|||
return REV_TREE_SAME;
|
||||
}
|
||||
|
||||
if (revs->bloom_keys_nr && !nth_parent) {
|
||||
if (revs->bloom_keyvecs_nr && !nth_parent) {
|
||||
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
|
||||
|
||||
if (bloom_ret == 0)
|
||||
|
@ -850,7 +844,7 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
|
|||
if (!t1)
|
||||
return 0;
|
||||
|
||||
if (!nth_parent && revs->bloom_keys_nr) {
|
||||
if (!nth_parent && revs->bloom_keyvecs_nr) {
|
||||
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
|
||||
if (!bloom_ret)
|
||||
return 1;
|
||||
|
@ -3202,6 +3196,14 @@ static void release_revisions_mailmap(struct string_list *mailmap)
|
|||
|
||||
static void release_revisions_topo_walk_info(struct topo_walk_info *info);
|
||||
|
||||
static void release_revisions_bloom_keyvecs(struct rev_info *revs)
|
||||
{
|
||||
for (size_t nr = 0; nr < revs->bloom_keyvecs_nr; nr++)
|
||||
bloom_keyvec_free(revs->bloom_keyvecs[nr]);
|
||||
FREE_AND_NULL(revs->bloom_keyvecs);
|
||||
revs->bloom_keyvecs_nr = 0;
|
||||
}
|
||||
|
||||
static void free_void_commit_list(void *list)
|
||||
{
|
||||
free_commit_list(list);
|
||||
|
@ -3230,11 +3232,7 @@ void release_revisions(struct rev_info *revs)
|
|||
clear_decoration(&revs->treesame, free);
|
||||
line_log_free(revs);
|
||||
oidset_clear(&revs->missing_commits);
|
||||
|
||||
for (int i = 0; i < revs->bloom_keys_nr; i++)
|
||||
clear_bloom_key(&revs->bloom_keys[i]);
|
||||
FREE_AND_NULL(revs->bloom_keys);
|
||||
revs->bloom_keys_nr = 0;
|
||||
release_revisions_bloom_keyvecs(revs);
|
||||
}
|
||||
|
||||
static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)
|
||||
|
|
|
@ -62,7 +62,7 @@ struct repository;
|
|||
struct rev_info;
|
||||
struct string_list;
|
||||
struct saved_parents;
|
||||
struct bloom_key;
|
||||
struct bloom_keyvec;
|
||||
struct bloom_filter_settings;
|
||||
struct option;
|
||||
struct parse_opt_ctx_t;
|
||||
|
@ -360,8 +360,8 @@ struct rev_info {
|
|||
|
||||
/* Commit graph bloom filter fields */
|
||||
/* The bloom filter key(s) for the pathspec */
|
||||
struct bloom_key *bloom_keys;
|
||||
int bloom_keys_nr;
|
||||
struct bloom_keyvec **bloom_keyvecs;
|
||||
int bloom_keyvecs_nr;
|
||||
|
||||
/*
|
||||
* The bloom filter settings used to generate the key.
|
||||
|
|
|
@ -12,13 +12,13 @@ static struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
|
|||
static void add_string_to_filter(const char *data, struct bloom_filter *filter) {
|
||||
struct bloom_key key;
|
||||
|
||||
fill_bloom_key(data, strlen(data), &key, &settings);
|
||||
bloom_key_fill(&key, data, strlen(data), &settings);
|
||||
printf("Hashes:");
|
||||
for (size_t i = 0; i < settings.num_hashes; i++)
|
||||
printf("0x%08x|", key.hashes[i]);
|
||||
printf("\n");
|
||||
add_key_to_filter(&key, filter, &settings);
|
||||
clear_bloom_key(&key);
|
||||
bloom_key_clear(&key);
|
||||
}
|
||||
|
||||
static void print_bloom_filter(struct bloom_filter *filter) {
|
||||
|
@ -61,13 +61,13 @@ int cmd__bloom(int argc, const char **argv)
|
|||
uint32_t hashed;
|
||||
if (argc < 3)
|
||||
usage(bloom_usage);
|
||||
hashed = murmur3_seeded_v2(0, argv[2], strlen(argv[2]));
|
||||
hashed = test_bloom_murmur3_seeded(0, argv[2], strlen(argv[2]), 2);
|
||||
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
|
||||
}
|
||||
|
||||
if (!strcmp(argv[1], "get_murmur3_seven_highbit")) {
|
||||
uint32_t hashed;
|
||||
hashed = murmur3_seeded_v2(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7);
|
||||
hashed = test_bloom_murmur3_seeded(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7, 2);
|
||||
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
|
||||
}
|
||||
|
||||
|
|
|
@ -66,8 +66,9 @@ sane_unset GIT_TRACE2_CONFIG_PARAMS
|
|||
|
||||
setup () {
|
||||
rm -f "$TRASH_DIRECTORY/trace.perf" &&
|
||||
git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom &&
|
||||
GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom
|
||||
eval git -c core.commitGraph=false log --pretty="format:%s" "$1" >log_wo_bloom &&
|
||||
eval "GIT_TRACE2_PERF=\"$TRASH_DIRECTORY/trace.perf\"" \
|
||||
git -c core.commitGraph=true log --pretty="format:%s" "$1" >log_w_bloom
|
||||
}
|
||||
|
||||
test_bloom_filters_used () {
|
||||
|
@ -138,10 +139,6 @@ test_expect_success 'git log with --walk-reflogs does not use Bloom filters' '
|
|||
test_bloom_filters_not_used "--walk-reflogs -- A"
|
||||
'
|
||||
|
||||
test_expect_success 'git log -- multiple path specs does not use Bloom filters' '
|
||||
test_bloom_filters_not_used "-- file4 A/file1"
|
||||
'
|
||||
|
||||
test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' '
|
||||
test_bloom_filters_not_used "-- ."
|
||||
'
|
||||
|
@ -151,9 +148,17 @@ test_expect_success 'git log with wildcard that resolves to a single path uses B
|
|||
test_bloom_filters_used "-- *renamed"
|
||||
'
|
||||
|
||||
test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' '
|
||||
test_bloom_filters_not_used "-- *" &&
|
||||
test_bloom_filters_not_used "-- file*"
|
||||
test_expect_success 'git log with multiple literal paths uses Bloom filter' '
|
||||
test_bloom_filters_used "-- file4 A/file1" &&
|
||||
test_bloom_filters_used "-- *" &&
|
||||
test_bloom_filters_used "-- file*"
|
||||
'
|
||||
|
||||
test_expect_success 'git log with path contains a wildcard does not use Bloom filter' '
|
||||
test_bloom_filters_not_used "-- file\*" &&
|
||||
test_bloom_filters_not_used "-- A/\* file4" &&
|
||||
test_bloom_filters_not_used "-- file4 A/\*" &&
|
||||
test_bloom_filters_not_used "-- * A/\*"
|
||||
'
|
||||
|
||||
test_expect_success 'setup - add commit-graph to the chain without Bloom filters' '
|
||||
|
|
Loading…
Reference in New Issue