bloom: annotate filters with hash version

In subsequent commits, we will want to load existing Bloom filters out
of a commit-graph, even when the hash version they were computed with
does not match the value of `commitGraph.changedPathVersion`.

In order to differentiate between the two, add a "version" field to each
Bloom filter.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Taylor Blau 2024-06-25 13:39:54 -04:00 committed by Junio C Hamano
parent ea0024deb9
commit 5b5d5b598c
2 changed files with 9 additions and 3 deletions

11
bloom.c
View File

@ -88,6 +88,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g,
filter->data = (unsigned char *)(g->chunk_bloom_data + filter->data = (unsigned char *)(g->chunk_bloom_data +
sizeof(unsigned char) * start_index + sizeof(unsigned char) * start_index +
BLOOMDATA_CHUNK_HEADER_SIZE); BLOOMDATA_CHUNK_HEADER_SIZE);
filter->version = g->bloom_filter_settings->hash_version;


return 1; return 1;
} }
@ -210,11 +211,13 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
return strcmp(e1->path, e2->path); return strcmp(e1->path, e2->path);
} }


static void init_truncated_large_filter(struct bloom_filter *filter) static void init_truncated_large_filter(struct bloom_filter *filter,
int version)
{ {
filter->data = xmalloc(1); filter->data = xmalloc(1);
filter->data[0] = 0xFF; filter->data[0] = 0xFF;
filter->len = 1; filter->len = 1;
filter->version = version;
} }


struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
@ -299,13 +302,15 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
} }


if (hashmap_get_size(&pathmap) > settings->max_changed_paths) { if (hashmap_get_size(&pathmap) > settings->max_changed_paths) {
init_truncated_large_filter(filter); init_truncated_large_filter(filter,
settings->hash_version);
if (computed) if (computed)
*computed |= BLOOM_TRUNC_LARGE; *computed |= BLOOM_TRUNC_LARGE;
goto cleanup; goto cleanup;
} }


filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
filter->version = settings->hash_version;
if (!filter->len) { if (!filter->len) {
if (computed) if (computed)
*computed |= BLOOM_TRUNC_EMPTY; *computed |= BLOOM_TRUNC_EMPTY;
@ -325,7 +330,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
} else { } else {
for (i = 0; i < diff_queued_diff.nr; i++) for (i = 0; i < diff_queued_diff.nr; i++)
diff_free_filepair(diff_queued_diff.queue[i]); diff_free_filepair(diff_queued_diff.queue[i]);
init_truncated_large_filter(filter); init_truncated_large_filter(filter, settings->hash_version);


if (computed) if (computed)
*computed |= BLOOM_TRUNC_LARGE; *computed |= BLOOM_TRUNC_LARGE;

View File

@ -53,6 +53,7 @@ struct bloom_filter_settings {
struct bloom_filter { struct bloom_filter {
unsigned char *data; unsigned char *data;
size_t len; size_t len;
int version;
}; };


/* /*