Merge branch 'tb/path-filter-fix'

The Bloom filter used for path limited history traversal was broken
on systems whose "char" is unsigned; update the implementation and
bump the format version to 2.

* tb/path-filter-fix:
  bloom: introduce `deinit_bloom_filters()`
  commit-graph: reuse existing Bloom filters where possible
  object.h: fix mis-aligned flag bits table
  commit-graph: new Bloom filter version that fixes murmur3
  commit-graph: unconditionally load Bloom filters
  bloom: prepare to discard incompatible Bloom filters
  bloom: annotate filters with hash version
  repo-settings: introduce commitgraph.changedPathsVersion
  t4216: test changed path filters with high bit paths
  t/helper/test-read-graph: implement `bloom-filters` mode
  bloom.h: make `load_bloom_filter_from_graph()` public
  t/helper/test-read-graph.c: extract `dump_graph_info()`
  gitformat-commit-graph: describe version 2 of BDAT
  commit-graph: ensure Bloom filters are read with consistent settings
  revision.c: consult Bloom filters for root commits
  t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`
maint
Junio C Hamano 2024-07-08 14:53:09 -07:00
commit ecf7fc600a
14 changed files with 736 additions and 58 deletions

View File

@ -9,6 +9,29 @@ commitGraph.maxNewFilters::
commit-graph write` (c.f., linkgit:git-commit-graph[1]). commit-graph write` (c.f., linkgit:git-commit-graph[1]).


commitGraph.readChangedPaths:: commitGraph.readChangedPaths::
If true, then git will use the changed-path Bloom filters in the Deprecated. Equivalent to commitGraph.changedPathsVersion=-1 if true, and
commit-graph file (if it exists, and they are present). Defaults to commitGraph.changedPathsVersion=0 if false. (If commitGraph.changedPathVersion
true. See linkgit:git-commit-graph[1] for more information. is also set, commitGraph.changedPathsVersion takes precedence.)

commitGraph.changedPathsVersion::
Specifies the version of the changed-path Bloom filters that Git will read and
write. May be -1, 0, 1, or 2. Note that values greater than 1 may be
incompatible with older versions of Git which do not yet understand
those versions. Use caution when operating in a mixed-version
environment.
+
Defaults to -1.
+
If -1, Git will use the version of the changed-path Bloom filters in the
repository, defaulting to 1 if there are none.
+
If 0, Git will not read any Bloom filters, and will write version 1 Bloom
filters when instructed to write.
+
If 1, Git will only read version 1 Bloom filters, and will write version 1
Bloom filters.
+
If 2, Git will only read version 2 Bloom filters, and will write version 2
Bloom filters.
+
See linkgit:git-commit-graph[1] for more information.

View File

@ -142,13 +142,16 @@ All multi-byte numbers are in network byte order.


==== Bloom Filter Data (ID: {'B', 'D', 'A', 'T'}) [Optional] ==== Bloom Filter Data (ID: {'B', 'D', 'A', 'T'}) [Optional]
* It starts with header consisting of three unsigned 32-bit integers: * It starts with header consisting of three unsigned 32-bit integers:
- Version of the hash algorithm being used. We currently only support - Version of the hash algorithm being used. We currently support
value 1 which corresponds to the 32-bit version of the murmur3 hash value 2 which corresponds to the 32-bit version of the murmur3 hash
implemented exactly as described in implemented exactly as described in
https://en.wikipedia.org/wiki/MurmurHash#Algorithm and the double https://en.wikipedia.org/wiki/MurmurHash#Algorithm and the double
hashing technique using seed values 0x293ae76f and 0x7e646e2 as hashing technique using seed values 0x293ae76f and 0x7e646e2 as
described in https://doi.org/10.1007/978-3-540-30494-4_26 "Bloom Filters described in https://doi.org/10.1007/978-3-540-30494-4_26 "Bloom Filters
in Probabilistic Verification" in Probabilistic Verification". Version 1 Bloom filters have a bug that appears
when char is signed and the repository has path names that have characters >=
0x80; Git supports reading and writing them, but this ability will be removed
in a future version of Git.
- The number of times a path is hashed and hence the number of bit positions - The number of times a path is hashed and hence the number of bit positions
that cumulatively determine whether a file is present in the commit. that cumulatively determine whether a file is present in the commit.
- The minimum number of bits 'b' per entry in the Bloom filter. If the filter - The minimum number of bits 'b' per entry in the Bloom filter. If the filter

208
bloom.c
View File

@ -6,6 +6,9 @@
#include "commit-graph.h" #include "commit-graph.h"
#include "commit.h" #include "commit.h"
#include "commit-slab.h" #include "commit-slab.h"
#include "tree.h"
#include "tree-walk.h"
#include "config.h"
#include "repository.h" #include "repository.h"


define_commit_slab(bloom_filter_slab, struct bloom_filter); define_commit_slab(bloom_filter_slab, struct bloom_filter);
@ -49,9 +52,9 @@ static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
return -1; return -1;
} }


static int load_bloom_filter_from_graph(struct commit_graph *g, int load_bloom_filter_from_graph(struct commit_graph *g,
struct bloom_filter *filter, struct bloom_filter *filter,
uint32_t graph_pos) uint32_t graph_pos)
{ {
uint32_t lex_pos, start_index, end_index; uint32_t lex_pos, start_index, end_index;


@ -89,6 +92,8 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
filter->data = (unsigned char *)(g->chunk_bloom_data + filter->data = (unsigned char *)(g->chunk_bloom_data +
sizeof(unsigned char) * start_index + sizeof(unsigned char) * start_index +
BLOOMDATA_CHUNK_HEADER_SIZE); BLOOMDATA_CHUNK_HEADER_SIZE);
filter->version = g->bloom_filter_settings->hash_version;
filter->to_free = NULL;


return 1; return 1;
} }
@ -100,7 +105,64 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
* Not considered to be cryptographically secure. * Not considered to be cryptographically secure.
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
*/ */
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len) uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
{
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
const uint32_t r1 = 15;
const uint32_t r2 = 13;
const uint32_t m = 5;
const uint32_t n = 0xe6546b64;
int i;
uint32_t k1 = 0;
const char *tail;

int len4 = len / sizeof(uint32_t);

uint32_t k;
for (i = 0; i < len4; i++) {
uint32_t byte1 = (uint32_t)(unsigned char)data[4*i];
uint32_t byte2 = ((uint32_t)(unsigned char)data[4*i + 1]) << 8;
uint32_t byte3 = ((uint32_t)(unsigned char)data[4*i + 2]) << 16;
uint32_t byte4 = ((uint32_t)(unsigned char)data[4*i + 3]) << 24;
k = byte1 | byte2 | byte3 | byte4;
k *= c1;
k = rotate_left(k, r1);
k *= c2;

seed ^= k;
seed = rotate_left(seed, r2) * m + n;
}

tail = (data + len4 * sizeof(uint32_t));

switch (len & (sizeof(uint32_t) - 1)) {
case 3:
k1 ^= ((uint32_t)(unsigned char)tail[2]) << 16;
/*-fallthrough*/
case 2:
k1 ^= ((uint32_t)(unsigned char)tail[1]) << 8;
/*-fallthrough*/
case 1:
k1 ^= ((uint32_t)(unsigned char)tail[0]) << 0;
k1 *= c1;
k1 = rotate_left(k1, r1);
k1 *= c2;
seed ^= k1;
break;
}

seed ^= (uint32_t)len;
seed ^= (seed >> 16);
seed *= 0x85ebca6b;
seed ^= (seed >> 13);
seed *= 0xc2b2ae35;
seed ^= (seed >> 16);

return seed;
}

static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len)
{ {
const uint32_t c1 = 0xcc9e2d51; const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593; const uint32_t c2 = 0x1b873593;
@ -165,8 +227,14 @@ void fill_bloom_key(const char *data,
int i; int i;
const uint32_t seed0 = 0x293ae76f; const uint32_t seed0 = 0x293ae76f;
const uint32_t seed1 = 0x7e646e2c; const uint32_t seed1 = 0x7e646e2c;
const uint32_t hash0 = murmur3_seeded(seed0, data, len); uint32_t hash0, hash1;
const uint32_t hash1 = murmur3_seeded(seed1, data, len); if (settings->hash_version == 2) {
hash0 = murmur3_seeded_v2(seed0, data, len);
hash1 = murmur3_seeded_v2(seed1, data, len);
} else {
hash0 = murmur3_seeded_v1(seed0, data, len);
hash1 = murmur3_seeded_v1(seed1, data, len);
}


key->hashes = (uint32_t *)xcalloc(settings->num_hashes, sizeof(uint32_t)); key->hashes = (uint32_t *)xcalloc(settings->num_hashes, sizeof(uint32_t));
for (i = 0; i < settings->num_hashes; i++) for (i = 0; i < settings->num_hashes; i++)
@ -198,6 +266,18 @@ void init_bloom_filters(void)
init_bloom_filter_slab(&bloom_filters); init_bloom_filter_slab(&bloom_filters);
} }


static void free_one_bloom_filter(struct bloom_filter *filter)
{
if (!filter)
return;
free(filter->to_free);
}

void deinit_bloom_filters(void)
{
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
}

static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED, static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
const struct hashmap_entry *eptr, const struct hashmap_entry *eptr,
const struct hashmap_entry *entry_or_key, const struct hashmap_entry *entry_or_key,
@ -211,11 +291,97 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
return strcmp(e1->path, e2->path); return strcmp(e1->path, e2->path);
} }


static void init_truncated_large_filter(struct bloom_filter *filter) static void init_truncated_large_filter(struct bloom_filter *filter,
int version)
{ {
filter->data = xmalloc(1); filter->data = filter->to_free = xmalloc(1);
filter->data[0] = 0xFF; filter->data[0] = 0xFF;
filter->len = 1; filter->len = 1;
filter->version = version;
}

#define VISITED (1u<<21)
#define HIGH_BITS (1u<<22)

static int has_entries_with_high_bit(struct repository *r, struct tree *t)
{
if (parse_tree(t))
return 1;

if (!(t->object.flags & VISITED)) {
struct tree_desc desc;
struct name_entry entry;

init_tree_desc(&desc, &t->object.oid, t->buffer, t->size);
while (tree_entry(&desc, &entry)) {
size_t i;
for (i = 0; i < entry.pathlen; i++) {
if (entry.path[i] & 0x80) {
t->object.flags |= HIGH_BITS;
goto done;
}
}

if (S_ISDIR(entry.mode)) {
struct tree *sub = lookup_tree(r, &entry.oid);
if (sub && has_entries_with_high_bit(r, sub)) {
t->object.flags |= HIGH_BITS;
goto done;
}
}

}

done:
t->object.flags |= VISITED;
}

return !!(t->object.flags & HIGH_BITS);
}

static int commit_tree_has_high_bit_paths(struct repository *r,
struct commit *c)
{
struct tree *t;
if (repo_parse_commit(r, c))
return 1;
t = repo_get_commit_tree(r, c);
if (!t)
return 1;
return has_entries_with_high_bit(r, t);
}

static struct bloom_filter *upgrade_filter(struct repository *r, struct commit *c,
struct bloom_filter *filter,
int hash_version)
{
struct commit_list *p = c->parents;
if (commit_tree_has_high_bit_paths(r, c))
return NULL;

if (p && commit_tree_has_high_bit_paths(r, p->item))
return NULL;

filter->version = hash_version;

return filter;
}

struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c)
{
struct bloom_filter *filter;
int hash_version;

filter = get_or_compute_bloom_filter(r, c, 0, NULL, NULL);
if (!filter)
return NULL;

prepare_repo_settings(r);
hash_version = r->settings.commit_graph_changed_paths_version;

if (!(hash_version == -1 || hash_version == filter->version))
return NULL; /* unusable filter */
return filter;
} }


struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
@ -243,8 +409,23 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
filter, graph_pos); filter, graph_pos);
} }


if (filter->data && filter->len) if (filter->data && filter->len) {
return filter; struct bloom_filter *upgrade;
if (!settings || settings->hash_version == filter->version)
return filter;

/* version mismatch, see if we can upgrade */
if (compute_if_not_present &&
git_env_bool("GIT_TEST_UPGRADE_BLOOM_FILTERS", 1)) {
upgrade = upgrade_filter(r, c, filter,
settings->hash_version);
if (upgrade) {
if (computed)
*computed |= BLOOM_UPGRADED;
return upgrade;
}
}
}
if (!compute_if_not_present) if (!compute_if_not_present)
return NULL; return NULL;


@ -300,19 +481,22 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
} }


if (hashmap_get_size(&pathmap) > settings->max_changed_paths) { if (hashmap_get_size(&pathmap) > settings->max_changed_paths) {
init_truncated_large_filter(filter); init_truncated_large_filter(filter,
settings->hash_version);
if (computed) if (computed)
*computed |= BLOOM_TRUNC_LARGE; *computed |= BLOOM_TRUNC_LARGE;
goto cleanup; goto cleanup;
} }


filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
filter->version = settings->hash_version;
if (!filter->len) { if (!filter->len) {
if (computed) if (computed)
*computed |= BLOOM_TRUNC_EMPTY; *computed |= BLOOM_TRUNC_EMPTY;
filter->len = 1; filter->len = 1;
} }
CALLOC_ARRAY(filter->data, filter->len); CALLOC_ARRAY(filter->data, filter->len);
filter->to_free = filter->data;


hashmap_for_each_entry(&pathmap, &iter, e, entry) { hashmap_for_each_entry(&pathmap, &iter, e, entry) {
struct bloom_key key; struct bloom_key key;
@ -326,7 +510,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
} else { } else {
for (i = 0; i < diff_queued_diff.nr; i++) for (i = 0; i < diff_queued_diff.nr; i++)
diff_free_filepair(diff_queued_diff.queue[i]); diff_free_filepair(diff_queued_diff.queue[i]);
init_truncated_large_filter(filter); init_truncated_large_filter(filter, settings->hash_version);


if (computed) if (computed)
*computed |= BLOOM_TRUNC_LARGE; *computed |= BLOOM_TRUNC_LARGE;

38
bloom.h
View File

@ -3,13 +3,16 @@


struct commit; struct commit;
struct repository; struct repository;
struct commit_graph;


struct bloom_filter_settings { struct bloom_filter_settings {
/* /*
* The version of the hashing technique being used. * The version of the hashing technique being used.
* We currently only support version = 1 which is * The newest version is 2, which is
* the seeded murmur3 hashing technique implemented * the seeded murmur3 hashing technique implemented
* in bloom.c. * in bloom.c. Bloom filters of version 1 were created
* with prior versions of Git, which had a bug in the
* implementation of the hash function.
*/ */
uint32_t hash_version; uint32_t hash_version;


@ -52,6 +55,9 @@ struct bloom_filter_settings {
struct bloom_filter { struct bloom_filter {
unsigned char *data; unsigned char *data;
size_t len; size_t len;
int version;

void *to_free;
}; };


/* /*
@ -68,6 +74,10 @@ struct bloom_key {
uint32_t *hashes; uint32_t *hashes;
}; };


int load_bloom_filter_from_graph(struct commit_graph *g,
struct bloom_filter *filter,
uint32_t graph_pos);

/* /*
* Calculate the murmur3 32-bit hash value for the given data * Calculate the murmur3 32-bit hash value for the given data
* using the given seed. * using the given seed.
@ -75,7 +85,7 @@ struct bloom_key {
* Not considered to be cryptographically secure. * Not considered to be cryptographically secure.
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
*/ */
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len); uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);


void fill_bloom_key(const char *data, void fill_bloom_key(const char *data,
size_t len, size_t len,
@ -88,12 +98,14 @@ void add_key_to_filter(const struct bloom_key *key,
const struct bloom_filter_settings *settings); const struct bloom_filter_settings *settings);


void init_bloom_filters(void); void init_bloom_filters(void);
void deinit_bloom_filters(void);


enum bloom_filter_computed { enum bloom_filter_computed {
BLOOM_NOT_COMPUTED = (1 << 0), BLOOM_NOT_COMPUTED = (1 << 0),
BLOOM_COMPUTED = (1 << 1), BLOOM_COMPUTED = (1 << 1),
BLOOM_TRUNC_LARGE = (1 << 2), BLOOM_TRUNC_LARGE = (1 << 2),
BLOOM_TRUNC_EMPTY = (1 << 3), BLOOM_TRUNC_EMPTY = (1 << 3),
BLOOM_UPGRADED = (1 << 4),
}; };


struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
@ -102,8 +114,24 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
const struct bloom_filter_settings *settings, const struct bloom_filter_settings *settings,
enum bloom_filter_computed *computed); enum bloom_filter_computed *computed);


#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \ /*
(r), (c), 0, NULL, NULL) * Find the Bloom filter associated with the given commit "c".
*
* If any of the following are true
*
* - the repository does not have a commit-graph, or
* - the repository disables reading from the commit-graph, or
* - the given commit does not have a Bloom filter computed, or
* - there is a Bloom filter for commit "c", but it cannot be read
* because the filter uses an incompatible version of murmur3
*
* , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding
* Bloom filter will be returned.
*
* For callers who wish to inspect Bloom filters with incompatible hash
* versions, use get_or_compute_bloom_filter().
*/
struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c);


int bloom_filter_contains(const struct bloom_filter *filter, int bloom_filter_contains(const struct bloom_filter *filter,
const struct bloom_key *key, const struct bloom_key *key,

View File

@ -346,7 +346,6 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
size_t chunk_size, void *data) size_t chunk_size, void *data)
{ {
struct commit_graph *g = data; struct commit_graph *g = data;
uint32_t hash_version;


if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) { if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
warning(_("ignoring too-small changed-path chunk" warning(_("ignoring too-small changed-path chunk"
@ -358,13 +357,9 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,


g->chunk_bloom_data = chunk_start; g->chunk_bloom_data = chunk_start;
g->chunk_bloom_data_size = chunk_size; g->chunk_bloom_data_size = chunk_size;
hash_version = get_be32(chunk_start);

if (hash_version != 1)
return 0;


g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings)); g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
g->bloom_filter_settings->hash_version = hash_version; g->bloom_filter_settings->hash_version = get_be32(chunk_start);
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4); g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8); g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES; g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
@ -461,7 +456,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
graph->read_generation_data = 1; graph->read_generation_data = 1;
} }


if (s->commit_graph_read_changed_paths) { if (s->commit_graph_changed_paths_version) {
read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
graph_read_bloom_index, graph); graph_read_bloom_index, graph);
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
@ -546,6 +541,31 @@ static int validate_mixed_generation_chain(struct commit_graph *g)
return 0; return 0;
} }


static void validate_mixed_bloom_settings(struct commit_graph *g)
{
struct bloom_filter_settings *settings = NULL;
for (; g; g = g->base_graph) {
if (!g->bloom_filter_settings)
continue;
if (!settings) {
settings = g->bloom_filter_settings;
continue;
}

if (g->bloom_filter_settings->bits_per_entry != settings->bits_per_entry ||
g->bloom_filter_settings->num_hashes != settings->num_hashes ||
g->bloom_filter_settings->hash_version != settings->hash_version) {
g->chunk_bloom_indexes = NULL;
g->chunk_bloom_data = NULL;
FREE_AND_NULL(g->bloom_filter_settings);

warning(_("disabling Bloom filters for commit-graph "
"layer '%s' due to incompatible settings"),
oid_to_hex(&g->oid));
}
}
}

static int add_graph_to_chain(struct commit_graph *g, static int add_graph_to_chain(struct commit_graph *g,
struct commit_graph *chain, struct commit_graph *chain,
struct object_id *oids, struct object_id *oids,
@ -670,6 +690,7 @@ struct commit_graph *load_commit_graph_chain_fd_st(struct repository *r,
} }


validate_mixed_generation_chain(graph_chain); validate_mixed_generation_chain(graph_chain);
validate_mixed_bloom_settings(graph_chain);


free(oids); free(oids);
fclose(fp); fclose(fp);
@ -814,6 +835,7 @@ void close_commit_graph(struct raw_object_store *o)
return; return;


clear_commit_graph_data_slab(&commit_graph_data_slab); clear_commit_graph_data_slab(&commit_graph_data_slab);
deinit_bloom_filters();
free_commit_graph(o->commit_graph); free_commit_graph(o->commit_graph);
o->commit_graph = NULL; o->commit_graph = NULL;
} }
@ -1152,6 +1174,7 @@ struct write_commit_graph_context {
int count_bloom_filter_not_computed; int count_bloom_filter_not_computed;
int count_bloom_filter_trunc_empty; int count_bloom_filter_trunc_empty;
int count_bloom_filter_trunc_large; int count_bloom_filter_trunc_large;
int count_bloom_filter_upgraded;
}; };


static int write_graph_chunk_fanout(struct hashfile *f, static int write_graph_chunk_fanout(struct hashfile *f,
@ -1759,6 +1782,8 @@ static void trace2_bloom_filter_write_statistics(struct write_commit_graph_conte
ctx->count_bloom_filter_trunc_empty); ctx->count_bloom_filter_trunc_empty);
trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large", trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
ctx->count_bloom_filter_trunc_large); ctx->count_bloom_filter_trunc_large);
trace2_data_intmax("commit-graph", ctx->r, "filter-upgraded",
ctx->count_bloom_filter_upgraded);
} }


static void compute_bloom_filters(struct write_commit_graph_context *ctx) static void compute_bloom_filters(struct write_commit_graph_context *ctx)
@ -1800,6 +1825,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
ctx->count_bloom_filter_trunc_empty++; ctx->count_bloom_filter_trunc_empty++;
if (computed & BLOOM_TRUNC_LARGE) if (computed & BLOOM_TRUNC_LARGE)
ctx->count_bloom_filter_trunc_large++; ctx->count_bloom_filter_trunc_large++;
} else if (computed & BLOOM_UPGRADED) {
ctx->count_bloom_filter_upgraded++;
} else if (computed & BLOOM_NOT_COMPUTED) } else if (computed & BLOOM_NOT_COMPUTED)
ctx->count_bloom_filter_not_computed++; ctx->count_bloom_filter_not_computed++;
ctx->total_bloom_filter_data_size += filter ctx->total_bloom_filter_data_size += filter
@ -2481,6 +2508,13 @@ int write_commit_graph(struct object_directory *odb,
} }
if (!commit_graph_compatible(r)) if (!commit_graph_compatible(r))
return 0; return 0;
if (r->settings.commit_graph_changed_paths_version < -1
|| r->settings.commit_graph_changed_paths_version > 2) {
warning(_("attempting to write a commit-graph, but "
"'commitGraph.changedPathsVersion' (%d) is not supported"),
r->settings.commit_graph_changed_paths_version);
return 0;
}


CALLOC_ARRAY(ctx, 1); CALLOC_ARRAY(ctx, 1);
ctx->r = r; ctx->r = r;
@ -2493,6 +2527,7 @@ int write_commit_graph(struct object_directory *odb,
ctx->write_generation_data = (get_configured_generation_version(r) == 2); ctx->write_generation_data = (get_configured_generation_version(r) == 2);
ctx->num_generation_data_overflows = 0; ctx->num_generation_data_overflows = 0;


bloom_settings.hash_version = r->settings.commit_graph_changed_paths_version;
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
bloom_settings.bits_per_entry); bloom_settings.bits_per_entry);
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES", bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
@ -2522,12 +2557,20 @@ int write_commit_graph(struct object_directory *odb,
g = ctx->r->objects->commit_graph; g = ctx->r->objects->commit_graph;


/* We have changed-paths already. Keep them in the next graph */ /* We have changed-paths already. Keep them in the next graph */
if (g && g->chunk_bloom_data) { if (g && g->bloom_filter_settings) {
ctx->changed_paths = 1; ctx->changed_paths = 1;
ctx->bloom_settings = g->bloom_filter_settings;
/* don't propagate the hash_version unless unspecified */
if (bloom_settings.hash_version == -1)
bloom_settings.hash_version = g->bloom_filter_settings->hash_version;
bloom_settings.bits_per_entry = g->bloom_filter_settings->bits_per_entry;
bloom_settings.num_hashes = g->bloom_filter_settings->num_hashes;
bloom_settings.max_changed_paths = g->bloom_filter_settings->max_changed_paths;
} }
} }


bloom_settings.hash_version = bloom_settings.hash_version == 2 ? 2 : 1;

if (ctx->split) { if (ctx->split) {
struct commit_graph *g = ctx->r->objects->commit_graph; struct commit_graph *g = ctx->r->objects->commit_graph;


@ -2611,6 +2654,9 @@ int write_commit_graph(struct object_directory *odb,


res = write_commit_graph_file(ctx); res = write_commit_graph_file(ctx);


if (ctx->changed_paths)
deinit_bloom_filters();

if (ctx->split) if (ctx->split)
mark_commit_graphs(ctx); mark_commit_graphs(ctx);



View File

@ -62,7 +62,7 @@ void object_array_init(struct object_array *array);


/* /*
* object flag allocation: * object flag allocation:
* revision.h: 0---------10 15 23------27 * revision.h: 0---------10 15 23------27
* fetch-pack.c: 01 67 * fetch-pack.c: 01 67
* negotiator/default.c: 2--5 * negotiator/default.c: 2--5
* walker.c: 0-2 * walker.c: 0-2
@ -75,6 +75,7 @@ void object_array_init(struct object_array *array);
* commit-reach.c: 16-----19 * commit-reach.c: 16-----19
* sha1-name.c: 20 * sha1-name.c: 20
* list-objects-filter.c: 21 * list-objects-filter.c: 21
* bloom.c: 2122
* builtin/fsck.c: 0--3 * builtin/fsck.c: 0--3
* builtin/gc.c: 0 * builtin/gc.c: 0
* builtin/index-pack.c: 2021 * builtin/index-pack.c: 2021

View File

@ -23,7 +23,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
*/ */
repo_set_hash_algo(the_repository, GIT_HASH_SHA1); repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
the_repository->settings.commit_graph_generation_version = 2; the_repository->settings.commit_graph_generation_version = 2;
the_repository->settings.commit_graph_read_changed_paths = 1; the_repository->settings.commit_graph_changed_paths_version = 1;
g = parse_commit_graph(&the_repository->settings, (void *)data, size); g = parse_commit_graph(&the_repository->settings, (void *)data, size);
repo_clear(the_repository); repo_clear(the_repository);
free_commit_graph(g); free_commit_graph(g);

View File

@ -23,6 +23,7 @@ void prepare_repo_settings(struct repository *r)
int value; int value;
const char *strval; const char *strval;
int manyfiles; int manyfiles;
int read_changed_paths;


if (!r->gitdir) if (!r->gitdir)
BUG("Cannot add settings for uninitialized repository"); BUG("Cannot add settings for uninitialized repository");
@ -54,7 +55,10 @@ void prepare_repo_settings(struct repository *r)
/* Commit graph config or default, does not cascade (simple) */ /* Commit graph config or default, does not cascade (simple) */
repo_cfg_bool(r, "core.commitgraph", &r->settings.core_commit_graph, 1); repo_cfg_bool(r, "core.commitgraph", &r->settings.core_commit_graph, 1);
repo_cfg_int(r, "commitgraph.generationversion", &r->settings.commit_graph_generation_version, 2); repo_cfg_int(r, "commitgraph.generationversion", &r->settings.commit_graph_generation_version, 2);
repo_cfg_bool(r, "commitgraph.readchangedpaths", &r->settings.commit_graph_read_changed_paths, 1); repo_cfg_bool(r, "commitgraph.readchangedpaths", &read_changed_paths, 1);
repo_cfg_int(r, "commitgraph.changedpathsversion",
&r->settings.commit_graph_changed_paths_version,
read_changed_paths ? -1 : 0);
repo_cfg_bool(r, "gc.writecommitgraph", &r->settings.gc_write_commit_graph, 1); repo_cfg_bool(r, "gc.writecommitgraph", &r->settings.gc_write_commit_graph, 1);
repo_cfg_bool(r, "fetch.writecommitgraph", &r->settings.fetch_write_commit_graph, 0); repo_cfg_bool(r, "fetch.writecommitgraph", &r->settings.fetch_write_commit_graph, 0);



View File

@ -37,7 +37,7 @@ struct repo_settings {


int core_commit_graph; int core_commit_graph;
int commit_graph_generation_version; int commit_graph_generation_version;
int commit_graph_read_changed_paths; int commit_graph_changed_paths_version;
int gc_write_commit_graph; int gc_write_commit_graph;
int fetch_write_commit_graph; int fetch_write_commit_graph;
int command_requires_full_index; int command_requires_full_index;

View File

@ -847,17 +847,28 @@ static int rev_compare_tree(struct rev_info *revs,
return tree_difference; return tree_difference;
} }


static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
int nth_parent)
{ {
struct tree *t1 = repo_get_commit_tree(the_repository, commit); struct tree *t1 = repo_get_commit_tree(the_repository, commit);
int bloom_ret = -1;


if (!t1) if (!t1)
return 0; return 0;


if (!nth_parent && revs->bloom_keys_nr) {
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
if (!bloom_ret)
return 1;
}

tree_difference = REV_TREE_SAME; tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0; revs->pruning.flags.has_changes = 0;
diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);


if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
count_bloom_filter_false_positive++;

return tree_difference == REV_TREE_SAME; return tree_difference == REV_TREE_SAME;
} }


@ -895,7 +906,7 @@ static int compact_treesame(struct rev_info *revs, struct commit *commit, unsign
if (nth_parent != 0) if (nth_parent != 0)
die("compact_treesame %u", nth_parent); die("compact_treesame %u", nth_parent);
old_same = !!(commit->object.flags & TREESAME); old_same = !!(commit->object.flags & TREESAME);
if (rev_same_tree_as_empty(revs, commit)) if (rev_same_tree_as_empty(revs, commit, nth_parent))
commit->object.flags |= TREESAME; commit->object.flags |= TREESAME;
else else
commit->object.flags &= ~TREESAME; commit->object.flags &= ~TREESAME;
@ -991,7 +1002,14 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
return; return;


if (!commit->parents) { if (!commit->parents) {
if (rev_same_tree_as_empty(revs, commit)) /*
* Pretend as if we are comparing ourselves to the
* (non-existent) first parent of this commit object. Even
* though no such parent exists, its changed-path Bloom filter
* (if one exists) is relative to the empty tree, using Bloom
* filters is allowed here.
*/
if (rev_same_tree_as_empty(revs, commit, 0))
commit->object.flags |= TREESAME; commit->object.flags |= TREESAME;
return; return;
} }
@ -1072,7 +1090,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)


case REV_TREE_NEW: case REV_TREE_NEW:
if (revs->remove_empty_trees && if (revs->remove_empty_trees &&
rev_same_tree_as_empty(revs, p)) { rev_same_tree_as_empty(revs, p, nth_parent)) {
/* We are adding all the specified /* We are adding all the specified
* paths from this parent, so the * paths from this parent, so the
* history beyond this parent is not * history beyond this parent is not

View File

@ -51,6 +51,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)


static const char *bloom_usage = "\n" static const char *bloom_usage = "\n"
" test-tool bloom get_murmur3 <string>\n" " test-tool bloom get_murmur3 <string>\n"
" test-tool bloom get_murmur3_seven_highbit\n"
" test-tool bloom generate_filter <string> [<string>...]\n" " test-tool bloom generate_filter <string> [<string>...]\n"
" test-tool bloom get_filter_for_commit <commit-hex>\n"; " test-tool bloom get_filter_for_commit <commit-hex>\n";


@ -65,7 +66,13 @@ int cmd__bloom(int argc, const char **argv)
uint32_t hashed; uint32_t hashed;
if (argc < 3) if (argc < 3)
usage(bloom_usage); usage(bloom_usage);
hashed = murmur3_seeded(0, argv[2], strlen(argv[2])); hashed = murmur3_seeded_v2(0, argv[2], strlen(argv[2]));
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
}

if (!strcmp(argv[1], "get_murmur3_seven_highbit")) {
uint32_t hashed;
hashed = murmur3_seeded_v2(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7);
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed); printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
} }



View File

@ -7,20 +7,8 @@
#include "bloom.h" #include "bloom.h"
#include "setup.h" #include "setup.h"


int cmd__read_graph(int argc UNUSED, const char **argv UNUSED) static void dump_graph_info(struct commit_graph *graph)
{ {
struct commit_graph *graph = NULL;
struct object_directory *odb;

setup_git_directory();
odb = the_repository->objects->odb;

prepare_repo_settings(the_repository);

graph = read_commit_graph_one(the_repository, odb);
if (!graph)
return 1;

printf("header: %08x %d %d %d %d\n", printf("header: %08x %d %d %d %d\n",
ntohl(*(uint32_t*)graph->data), ntohl(*(uint32_t*)graph->data),
*(unsigned char*)(graph->data + 4), *(unsigned char*)(graph->data + 4),
@ -59,8 +47,57 @@ int cmd__read_graph(int argc UNUSED, const char **argv UNUSED)
if (graph->topo_levels) if (graph->topo_levels)
printf(" topo_levels"); printf(" topo_levels");
printf("\n"); printf("\n");
}


static void dump_graph_bloom_filters(struct commit_graph *graph)
{
uint32_t i;

for (i = 0; i < graph->num_commits + graph->num_commits_in_base; i++) {
struct bloom_filter filter = { 0 };
size_t j;

if (load_bloom_filter_from_graph(graph, &filter, i) < 0) {
fprintf(stderr, "missing Bloom filter for graph "
"position %"PRIu32"\n", i);
continue;
}

for (j = 0; j < filter.len; j++)
printf("%02x", filter.data[j]);
if (filter.len)
printf("\n");
}
}

int cmd__read_graph(int argc, const char **argv)
{
struct commit_graph *graph = NULL;
struct object_directory *odb;
int ret = 0;

setup_git_directory();
odb = the_repository->objects->odb;

prepare_repo_settings(the_repository);

graph = read_commit_graph_one(the_repository, odb);
if (!graph) {
ret = 1;
goto done;
}

if (argc <= 1)
dump_graph_info(graph);
else if (!strcmp(argv[1], "bloom-filters"))
dump_graph_bloom_filters(graph);
else {
fprintf(stderr, "unknown sub-command: '%s'\n", argv[1]);
ret = 1;
}

done:
UNLEAK(graph); UNLEAK(graph);


return 0; return ret;
} }

View File

@ -29,6 +29,14 @@ test_expect_success 'compute unseeded murmur3 hash for test string 2' '
test_cmp expect actual test_cmp expect actual
' '


test_expect_success 'compute unseeded murmur3 hash for test string 3' '
cat >expect <<-\EOF &&
Murmur3 Hash with seed=0:0xa183ccfd
EOF
test-tool bloom get_murmur3_seven_highbit >actual &&
test_cmp expect actual
'

test_expect_success 'compute bloom key for empty string' ' test_expect_success 'compute bloom key for empty string' '
cat >expect <<-\EOF && cat >expect <<-\EOF &&
Hashes:0x5615800c|0x5b966560|0x61174ab4|0x66983008|0x6c19155c|0x7199fab0|0x771ae004| Hashes:0x5615800c|0x5b966560|0x61174ab4|0x66983008|0x6c19155c|0x7199fab0|0x771ae004|

View File

@ -82,7 +82,23 @@ test_bloom_filters_used () {
test_bloom_filters_not_used () { test_bloom_filters_not_used () {
log_args=$1 log_args=$1
setup "$log_args" && setup "$log_args" &&
! grep -q "statistics:{\"filter_not_present\":" "$TRASH_DIRECTORY/trace.perf" &&
if grep -q "statistics:{\"filter_not_present\":" "$TRASH_DIRECTORY/trace.perf"
then
# if the Bloom filter system is initialized, ensure that no
# filters were used
data="statistics:{"
# unusable filters (e.g., those computed with a
# different value of commitGraph.changedPathsVersion)
# are counted in the filter_not_present bucket, so any
# value is OK there.
data="$data\"filter_not_present\":[0-9][0-9]*,"
data="$data\"maybe\":0,"
data="$data\"definitely_not\":0,"
data="$data\"false_positive\":0}"

grep -q "$data" "$TRASH_DIRECTORY/trace.perf"
fi &&
test_cmp log_wo_bloom log_w_bloom test_cmp log_wo_bloom log_w_bloom
} }


@ -163,7 +179,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '


test_bloom_filters_used_when_some_filters_are_missing () { test_bloom_filters_used_when_some_filters_are_missing () {
log_args=$1 log_args=$1
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9" bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":10"
setup "$log_args" && setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom test_cmp log_wo_bloom log_w_bloom
@ -206,6 +222,10 @@ test_filter_trunc_large () {
grep "\"key\":\"filter-trunc-large\",\"value\":\"$1\"" $2 grep "\"key\":\"filter-trunc-large\",\"value\":\"$1\"" $2
} }


test_filter_upgraded () {
grep "\"key\":\"filter-upgraded\",\"value\":\"$1\"" $2
}

test_expect_success 'correctly report changes over limit' ' test_expect_success 'correctly report changes over limit' '
git init limits && git init limits &&
( (
@ -405,8 +425,307 @@ test_expect_success 'Bloom generation backfills empty commits' '
) )
' '


graph=.git/objects/info/commit-graph
graphdir=.git/objects/info/commit-graphs
chain=$graphdir/commit-graph-chain

test_expect_success 'setup for mixed Bloom setting tests' '
repo=mixed-bloom-settings &&

git init $repo &&
for i in one two three
do
test_commit -C $repo $i file || return 1
done
'

test_expect_success 'ensure Bloom filters with incompatible settings are ignored' '
# Compute Bloom filters with "unusual" settings.
git -C $repo rev-parse one >in &&
GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=3 git -C $repo commit-graph write \
--stdin-commits --changed-paths --split <in &&
layer=$(head -n 1 $repo/$chain) &&

# A commit-graph layer without Bloom filters "hides" the layers
# below ...
git -C $repo rev-parse two >in &&
git -C $repo commit-graph write --stdin-commits --no-changed-paths \
--split=no-merge <in &&

# Another commit-graph layer that has Bloom filters, but with
# standard settings, and is thus incompatible with the base
# layer written above.
git -C $repo rev-parse HEAD >in &&
git -C $repo commit-graph write --stdin-commits --changed-paths \
--split=no-merge <in &&

test_line_count = 3 $repo/$chain &&

# Ensure that incompatible Bloom filters are ignored.
git -C $repo -c core.commitGraph=false log --oneline --no-decorate -- file \
>expect 2>err &&
git -C $repo log --oneline --no-decorate -- file >actual 2>err &&
test_cmp expect actual &&
grep "disabling Bloom filters for commit-graph layer .$layer." err
'

test_expect_success 'merge graph layers with incompatible Bloom settings' '
# Ensure that incompatible Bloom filters are ignored when
# merging existing layers.
>trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git -C $repo commit-graph write --reachable --changed-paths 2>err &&
grep "disabling Bloom filters for commit-graph layer .$layer." err &&
grep "{\"hash_version\":1,\"num_hashes\":7,\"bits_per_entry\":10,\"max_changed_paths\":512" trace2.txt &&

test_path_is_file $repo/$graph &&
test_dir_is_empty $repo/$graphdir &&

git -C $repo -c core.commitGraph=false log --oneline --no-decorate -- \
file >expect &&
trace_out="$(pwd)/trace.perf" &&
GIT_TRACE2_PERF="$trace_out" \
git -C $repo log --oneline --no-decorate -- file >actual 2>err &&

test_cmp expect actual &&
grep "statistics:{\"filter_not_present\":0," trace.perf &&
test_must_be_empty err
'

# chosen to be the same under all Unicode normalization forms
CENT=$(printf "\302\242")

test_expect_success 'ensure Bloom filter with incompatible versions are ignored' '
rm "$repo/$graph" &&

git -C $repo log --oneline --no-decorate -- $CENT >expect &&

# Compute v1 Bloom filters for commits at the bottom.
git -C $repo rev-parse HEAD^ >in &&
git -C $repo commit-graph write --stdin-commits --changed-paths \
--split <in &&

# Compute v2 Bloomfilters for the rest of the commits at the top.
git -C $repo rev-parse HEAD >in &&
git -C $repo -c commitGraph.changedPathsVersion=2 commit-graph write \
--stdin-commits --changed-paths --split=no-merge <in &&

test_line_count = 2 $repo/$chain &&

git -C $repo log --oneline --no-decorate -- $CENT >actual 2>err &&
test_cmp expect actual &&

layer="$(head -n 1 $repo/$chain)" &&
cat >expect.err <<-EOF &&
warning: disabling Bloom filters for commit-graph layer $SQ$layer$SQ due to incompatible settings
EOF
test_cmp expect.err err &&

# Merge the two layers with incompatible bloom filter versions,
# ensuring that the v2 filters are used.
>trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git -C $repo -c commitGraph.changedPathsVersion=2 commit-graph write --reachable --changed-paths 2>err &&
grep "disabling Bloom filters for commit-graph layer .$layer." err &&
grep "{\"hash_version\":2,\"num_hashes\":7,\"bits_per_entry\":10,\"max_changed_paths\":512" trace2.txt
'

get_first_changed_path_filter () {
test-tool read-graph bloom-filters >filters.dat &&
head -n 1 filters.dat
}

test_expect_success 'set up repo with high bit path, version 1 changed-path' '
git init highbit1 &&
test_commit -C highbit1 c1 "$CENT" &&
git -C highbit1 commit-graph write --reachable --changed-paths
'

test_expect_success 'setup check value of version 1 changed-path' '
(
cd highbit1 &&
echo "52a9" >expect &&
get_first_changed_path_filter >actual
)
'

# expect will not match actual if char is unsigned by default. Write the test
# in this way, so that a user running this test script can still see if the two
# files match. (It will appear as an ordinary success if they match, and a skip
# if not.)
if test_cmp highbit1/expect highbit1/actual
then
test_set_prereq SIGNED_CHAR_BY_DEFAULT
fi
test_expect_success SIGNED_CHAR_BY_DEFAULT 'check value of version 1 changed-path' '
# Only the prereq matters for this test.
true
'

test_expect_success 'setup make another commit' '
# "git log" does not use Bloom filters for root commits - see how, in
# revision.c, rev_compare_tree() (the only code path that eventually calls
# get_bloom_filter()) is only called by try_to_simplify_commit() when the commit
# has one parent. Therefore, make another commit so that we perform the tests on
# a non-root commit.
test_commit -C highbit1 anotherc1 "another$CENT"
'

test_expect_success 'version 1 changed-path used when version 1 requested' '
(
cd highbit1 &&
test_bloom_filters_used "-- another$CENT"
)
'

test_expect_success 'version 1 changed-path not used when version 2 requested' '
(
cd highbit1 &&
git config --add commitGraph.changedPathsVersion 2 &&
test_bloom_filters_not_used "-- another$CENT"
)
'

test_expect_success 'version 1 changed-path used when autodetect requested' '
(
cd highbit1 &&
git config --add commitGraph.changedPathsVersion -1 &&
test_bloom_filters_used "-- another$CENT"
)
'

test_expect_success 'when writing another commit graph, preserve existing version 1 of changed-path' '
test_commit -C highbit1 c1double "$CENT$CENT" &&
git -C highbit1 commit-graph write --reachable --changed-paths &&
(
cd highbit1 &&
git config --add commitGraph.changedPathsVersion -1 &&
echo "options: bloom(1,10,7) read_generation_data" >expect &&
test-tool read-graph >full &&
grep options full >actual &&
test_cmp expect actual
)
'

test_expect_success 'set up repo with high bit path, version 2 changed-path' '
git init highbit2 &&
git -C highbit2 config --add commitGraph.changedPathsVersion 2 &&
test_commit -C highbit2 c2 "$CENT" &&
git -C highbit2 commit-graph write --reachable --changed-paths
'

test_expect_success 'check value of version 2 changed-path' '
(
cd highbit2 &&
echo "c01f" >expect &&
get_first_changed_path_filter >actual &&
test_cmp expect actual
)
'

test_expect_success 'setup make another commit' '
# "git log" does not use Bloom filters for root commits - see how, in
# revision.c, rev_compare_tree() (the only code path that eventually calls
# get_bloom_filter()) is only called by try_to_simplify_commit() when the commit
# has one parent. Therefore, make another commit so that we perform the tests on
# a non-root commit.
test_commit -C highbit2 anotherc2 "another$CENT"
'

test_expect_success 'version 2 changed-path used when version 2 requested' '
(
cd highbit2 &&
test_bloom_filters_used "-- another$CENT"
)
'

test_expect_success 'version 2 changed-path not used when version 1 requested' '
(
cd highbit2 &&
git config --add commitGraph.changedPathsVersion 1 &&
test_bloom_filters_not_used "-- another$CENT"
)
'

test_expect_success 'version 2 changed-path used when autodetect requested' '
(
cd highbit2 &&
git config --add commitGraph.changedPathsVersion -1 &&
test_bloom_filters_used "-- another$CENT"
)
'

test_expect_success 'when writing another commit graph, preserve existing version 2 of changed-path' '
test_commit -C highbit2 c2double "$CENT$CENT" &&
git -C highbit2 commit-graph write --reachable --changed-paths &&
(
cd highbit2 &&
git config --add commitGraph.changedPathsVersion -1 &&
echo "options: bloom(2,10,7) read_generation_data" >expect &&
test-tool read-graph >full &&
grep options full >actual &&
test_cmp expect actual
)
'

test_expect_success 'when writing commit graph, do not reuse changed-path of another version' '
git init doublewrite &&
test_commit -C doublewrite c "$CENT" &&

git -C doublewrite config --add commitGraph.changedPathsVersion 1 &&
>trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git -C doublewrite commit-graph write --reachable --changed-paths &&
test_filter_computed 1 trace2.txt &&
test_filter_upgraded 0 trace2.txt &&

git -C doublewrite commit-graph write --reachable --changed-paths &&
for v in -2 3
do
git -C doublewrite config --add commitGraph.changedPathsVersion $v &&
git -C doublewrite commit-graph write --reachable --changed-paths 2>err &&
cat >expect <<-EOF &&
warning: attempting to write a commit-graph, but ${SQ}commitGraph.changedPathsVersion${SQ} ($v) is not supported
EOF
test_cmp expect err || return 1
done &&

git -C doublewrite config --add commitGraph.changedPathsVersion 2 &&
>trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git -C doublewrite commit-graph write --reachable --changed-paths &&
test_filter_computed 1 trace2.txt &&
test_filter_upgraded 0 trace2.txt &&

(
cd doublewrite &&
echo "c01f" >expect &&
get_first_changed_path_filter >actual &&
test_cmp expect actual
)
'

test_expect_success 'when writing commit graph, reuse changed-path of another version where possible' '
git init upgrade &&

test_commit -C upgrade base no-high-bits &&

git -C upgrade config --add commitGraph.changedPathsVersion 1 &&
>trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git -C upgrade commit-graph write --reachable --changed-paths &&
test_filter_computed 1 trace2.txt &&
test_filter_upgraded 0 trace2.txt &&

git -C upgrade config --add commitGraph.changedPathsVersion 2 &&
>trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git -C upgrade commit-graph write --reachable --changed-paths &&
test_filter_computed 0 trace2.txt &&
test_filter_upgraded 1 trace2.txt
'

corrupt_graph () { corrupt_graph () {
graph=.git/objects/info/commit-graph &&
test_when_finished "rm -rf $graph" && test_when_finished "rm -rf $graph" &&
git commit-graph write --reachable --changed-paths && git commit-graph write --reachable --changed-paths &&
corrupt_chunk_file $graph "$@" corrupt_chunk_file $graph "$@"