Merge branch 'tb/path-filter-fix'
The Bloom filter used for path limited history traversal was broken on systems whose "char" is unsigned; update the implementation and bump the format version to 2. * tb/path-filter-fix: bloom: introduce `deinit_bloom_filters()` commit-graph: reuse existing Bloom filters where possible object.h: fix mis-aligned flag bits table commit-graph: new Bloom filter version that fixes murmur3 commit-graph: unconditionally load Bloom filters bloom: prepare to discard incompatible Bloom filters bloom: annotate filters with hash version repo-settings: introduce commitgraph.changedPathsVersion t4216: test changed path filters with high bit paths t/helper/test-read-graph: implement `bloom-filters` mode bloom.h: make `load_bloom_filter_from_graph()` public t/helper/test-read-graph.c: extract `dump_graph_info()` gitformat-commit-graph: describe version 2 of BDAT commit-graph: ensure Bloom filters are read with consistent settings revision.c: consult Bloom filters for root commits t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`maint
commit
ecf7fc600a
|
@ -9,6 +9,29 @@ commitGraph.maxNewFilters::
|
|||
commit-graph write` (c.f., linkgit:git-commit-graph[1]).
|
||||
|
||||
commitGraph.readChangedPaths::
|
||||
If true, then git will use the changed-path Bloom filters in the
|
||||
commit-graph file (if it exists, and they are present). Defaults to
|
||||
true. See linkgit:git-commit-graph[1] for more information.
|
||||
Deprecated. Equivalent to commitGraph.changedPathsVersion=-1 if true, and
|
||||
commitGraph.changedPathsVersion=0 if false. (If commitGraph.changedPathVersion
|
||||
is also set, commitGraph.changedPathsVersion takes precedence.)
|
||||
|
||||
commitGraph.changedPathsVersion::
|
||||
Specifies the version of the changed-path Bloom filters that Git will read and
|
||||
write. May be -1, 0, 1, or 2. Note that values greater than 1 may be
|
||||
incompatible with older versions of Git which do not yet understand
|
||||
those versions. Use caution when operating in a mixed-version
|
||||
environment.
|
||||
+
|
||||
Defaults to -1.
|
||||
+
|
||||
If -1, Git will use the version of the changed-path Bloom filters in the
|
||||
repository, defaulting to 1 if there are none.
|
||||
+
|
||||
If 0, Git will not read any Bloom filters, and will write version 1 Bloom
|
||||
filters when instructed to write.
|
||||
+
|
||||
If 1, Git will only read version 1 Bloom filters, and will write version 1
|
||||
Bloom filters.
|
||||
+
|
||||
If 2, Git will only read version 2 Bloom filters, and will write version 2
|
||||
Bloom filters.
|
||||
+
|
||||
See linkgit:git-commit-graph[1] for more information.
|
||||
|
|
|
@ -142,13 +142,16 @@ All multi-byte numbers are in network byte order.
|
|||
|
||||
==== Bloom Filter Data (ID: {'B', 'D', 'A', 'T'}) [Optional]
|
||||
* It starts with header consisting of three unsigned 32-bit integers:
|
||||
- Version of the hash algorithm being used. We currently only support
|
||||
value 1 which corresponds to the 32-bit version of the murmur3 hash
|
||||
- Version of the hash algorithm being used. We currently support
|
||||
value 2 which corresponds to the 32-bit version of the murmur3 hash
|
||||
implemented exactly as described in
|
||||
https://en.wikipedia.org/wiki/MurmurHash#Algorithm and the double
|
||||
hashing technique using seed values 0x293ae76f and 0x7e646e2 as
|
||||
described in https://doi.org/10.1007/978-3-540-30494-4_26 "Bloom Filters
|
||||
in Probabilistic Verification"
|
||||
in Probabilistic Verification". Version 1 Bloom filters have a bug that appears
|
||||
when char is signed and the repository has path names that have characters >=
|
||||
0x80; Git supports reading and writing them, but this ability will be removed
|
||||
in a future version of Git.
|
||||
- The number of times a path is hashed and hence the number of bit positions
|
||||
that cumulatively determine whether a file is present in the commit.
|
||||
- The minimum number of bits 'b' per entry in the Bloom filter. If the filter
|
||||
|
|
202
bloom.c
202
bloom.c
|
@ -6,6 +6,9 @@
|
|||
#include "commit-graph.h"
|
||||
#include "commit.h"
|
||||
#include "commit-slab.h"
|
||||
#include "tree.h"
|
||||
#include "tree-walk.h"
|
||||
#include "config.h"
|
||||
#include "repository.h"
|
||||
|
||||
define_commit_slab(bloom_filter_slab, struct bloom_filter);
|
||||
|
@ -49,7 +52,7 @@ static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
|
|||
return -1;
|
||||
}
|
||||
|
||||
static int load_bloom_filter_from_graph(struct commit_graph *g,
|
||||
int load_bloom_filter_from_graph(struct commit_graph *g,
|
||||
struct bloom_filter *filter,
|
||||
uint32_t graph_pos)
|
||||
{
|
||||
|
@ -89,6 +92,8 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
|
|||
filter->data = (unsigned char *)(g->chunk_bloom_data +
|
||||
sizeof(unsigned char) * start_index +
|
||||
BLOOMDATA_CHUNK_HEADER_SIZE);
|
||||
filter->version = g->bloom_filter_settings->hash_version;
|
||||
filter->to_free = NULL;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -100,7 +105,64 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
|
|||
* Not considered to be cryptographically secure.
|
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
|
||||
*/
|
||||
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len)
|
||||
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
|
||||
{
|
||||
const uint32_t c1 = 0xcc9e2d51;
|
||||
const uint32_t c2 = 0x1b873593;
|
||||
const uint32_t r1 = 15;
|
||||
const uint32_t r2 = 13;
|
||||
const uint32_t m = 5;
|
||||
const uint32_t n = 0xe6546b64;
|
||||
int i;
|
||||
uint32_t k1 = 0;
|
||||
const char *tail;
|
||||
|
||||
int len4 = len / sizeof(uint32_t);
|
||||
|
||||
uint32_t k;
|
||||
for (i = 0; i < len4; i++) {
|
||||
uint32_t byte1 = (uint32_t)(unsigned char)data[4*i];
|
||||
uint32_t byte2 = ((uint32_t)(unsigned char)data[4*i + 1]) << 8;
|
||||
uint32_t byte3 = ((uint32_t)(unsigned char)data[4*i + 2]) << 16;
|
||||
uint32_t byte4 = ((uint32_t)(unsigned char)data[4*i + 3]) << 24;
|
||||
k = byte1 | byte2 | byte3 | byte4;
|
||||
k *= c1;
|
||||
k = rotate_left(k, r1);
|
||||
k *= c2;
|
||||
|
||||
seed ^= k;
|
||||
seed = rotate_left(seed, r2) * m + n;
|
||||
}
|
||||
|
||||
tail = (data + len4 * sizeof(uint32_t));
|
||||
|
||||
switch (len & (sizeof(uint32_t) - 1)) {
|
||||
case 3:
|
||||
k1 ^= ((uint32_t)(unsigned char)tail[2]) << 16;
|
||||
/*-fallthrough*/
|
||||
case 2:
|
||||
k1 ^= ((uint32_t)(unsigned char)tail[1]) << 8;
|
||||
/*-fallthrough*/
|
||||
case 1:
|
||||
k1 ^= ((uint32_t)(unsigned char)tail[0]) << 0;
|
||||
k1 *= c1;
|
||||
k1 = rotate_left(k1, r1);
|
||||
k1 *= c2;
|
||||
seed ^= k1;
|
||||
break;
|
||||
}
|
||||
|
||||
seed ^= (uint32_t)len;
|
||||
seed ^= (seed >> 16);
|
||||
seed *= 0x85ebca6b;
|
||||
seed ^= (seed >> 13);
|
||||
seed *= 0xc2b2ae35;
|
||||
seed ^= (seed >> 16);
|
||||
|
||||
return seed;
|
||||
}
|
||||
|
||||
static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len)
|
||||
{
|
||||
const uint32_t c1 = 0xcc9e2d51;
|
||||
const uint32_t c2 = 0x1b873593;
|
||||
|
@ -165,8 +227,14 @@ void fill_bloom_key(const char *data,
|
|||
int i;
|
||||
const uint32_t seed0 = 0x293ae76f;
|
||||
const uint32_t seed1 = 0x7e646e2c;
|
||||
const uint32_t hash0 = murmur3_seeded(seed0, data, len);
|
||||
const uint32_t hash1 = murmur3_seeded(seed1, data, len);
|
||||
uint32_t hash0, hash1;
|
||||
if (settings->hash_version == 2) {
|
||||
hash0 = murmur3_seeded_v2(seed0, data, len);
|
||||
hash1 = murmur3_seeded_v2(seed1, data, len);
|
||||
} else {
|
||||
hash0 = murmur3_seeded_v1(seed0, data, len);
|
||||
hash1 = murmur3_seeded_v1(seed1, data, len);
|
||||
}
|
||||
|
||||
key->hashes = (uint32_t *)xcalloc(settings->num_hashes, sizeof(uint32_t));
|
||||
for (i = 0; i < settings->num_hashes; i++)
|
||||
|
@ -198,6 +266,18 @@ void init_bloom_filters(void)
|
|||
init_bloom_filter_slab(&bloom_filters);
|
||||
}
|
||||
|
||||
static void free_one_bloom_filter(struct bloom_filter *filter)
|
||||
{
|
||||
if (!filter)
|
||||
return;
|
||||
free(filter->to_free);
|
||||
}
|
||||
|
||||
void deinit_bloom_filters(void)
|
||||
{
|
||||
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
|
||||
}
|
||||
|
||||
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
|
||||
const struct hashmap_entry *eptr,
|
||||
const struct hashmap_entry *entry_or_key,
|
||||
|
@ -211,11 +291,97 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
|
|||
return strcmp(e1->path, e2->path);
|
||||
}
|
||||
|
||||
static void init_truncated_large_filter(struct bloom_filter *filter)
|
||||
static void init_truncated_large_filter(struct bloom_filter *filter,
|
||||
int version)
|
||||
{
|
||||
filter->data = xmalloc(1);
|
||||
filter->data = filter->to_free = xmalloc(1);
|
||||
filter->data[0] = 0xFF;
|
||||
filter->len = 1;
|
||||
filter->version = version;
|
||||
}
|
||||
|
||||
#define VISITED (1u<<21)
|
||||
#define HIGH_BITS (1u<<22)
|
||||
|
||||
static int has_entries_with_high_bit(struct repository *r, struct tree *t)
|
||||
{
|
||||
if (parse_tree(t))
|
||||
return 1;
|
||||
|
||||
if (!(t->object.flags & VISITED)) {
|
||||
struct tree_desc desc;
|
||||
struct name_entry entry;
|
||||
|
||||
init_tree_desc(&desc, &t->object.oid, t->buffer, t->size);
|
||||
while (tree_entry(&desc, &entry)) {
|
||||
size_t i;
|
||||
for (i = 0; i < entry.pathlen; i++) {
|
||||
if (entry.path[i] & 0x80) {
|
||||
t->object.flags |= HIGH_BITS;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
if (S_ISDIR(entry.mode)) {
|
||||
struct tree *sub = lookup_tree(r, &entry.oid);
|
||||
if (sub && has_entries_with_high_bit(r, sub)) {
|
||||
t->object.flags |= HIGH_BITS;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
done:
|
||||
t->object.flags |= VISITED;
|
||||
}
|
||||
|
||||
return !!(t->object.flags & HIGH_BITS);
|
||||
}
|
||||
|
||||
static int commit_tree_has_high_bit_paths(struct repository *r,
|
||||
struct commit *c)
|
||||
{
|
||||
struct tree *t;
|
||||
if (repo_parse_commit(r, c))
|
||||
return 1;
|
||||
t = repo_get_commit_tree(r, c);
|
||||
if (!t)
|
||||
return 1;
|
||||
return has_entries_with_high_bit(r, t);
|
||||
}
|
||||
|
||||
static struct bloom_filter *upgrade_filter(struct repository *r, struct commit *c,
|
||||
struct bloom_filter *filter,
|
||||
int hash_version)
|
||||
{
|
||||
struct commit_list *p = c->parents;
|
||||
if (commit_tree_has_high_bit_paths(r, c))
|
||||
return NULL;
|
||||
|
||||
if (p && commit_tree_has_high_bit_paths(r, p->item))
|
||||
return NULL;
|
||||
|
||||
filter->version = hash_version;
|
||||
|
||||
return filter;
|
||||
}
|
||||
|
||||
struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c)
|
||||
{
|
||||
struct bloom_filter *filter;
|
||||
int hash_version;
|
||||
|
||||
filter = get_or_compute_bloom_filter(r, c, 0, NULL, NULL);
|
||||
if (!filter)
|
||||
return NULL;
|
||||
|
||||
prepare_repo_settings(r);
|
||||
hash_version = r->settings.commit_graph_changed_paths_version;
|
||||
|
||||
if (!(hash_version == -1 || hash_version == filter->version))
|
||||
return NULL; /* unusable filter */
|
||||
return filter;
|
||||
}
|
||||
|
||||
struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
|
||||
|
@ -243,8 +409,23 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
|
|||
filter, graph_pos);
|
||||
}
|
||||
|
||||
if (filter->data && filter->len)
|
||||
if (filter->data && filter->len) {
|
||||
struct bloom_filter *upgrade;
|
||||
if (!settings || settings->hash_version == filter->version)
|
||||
return filter;
|
||||
|
||||
/* version mismatch, see if we can upgrade */
|
||||
if (compute_if_not_present &&
|
||||
git_env_bool("GIT_TEST_UPGRADE_BLOOM_FILTERS", 1)) {
|
||||
upgrade = upgrade_filter(r, c, filter,
|
||||
settings->hash_version);
|
||||
if (upgrade) {
|
||||
if (computed)
|
||||
*computed |= BLOOM_UPGRADED;
|
||||
return upgrade;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!compute_if_not_present)
|
||||
return NULL;
|
||||
|
||||
|
@ -300,19 +481,22 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
|
|||
}
|
||||
|
||||
if (hashmap_get_size(&pathmap) > settings->max_changed_paths) {
|
||||
init_truncated_large_filter(filter);
|
||||
init_truncated_large_filter(filter,
|
||||
settings->hash_version);
|
||||
if (computed)
|
||||
*computed |= BLOOM_TRUNC_LARGE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
|
||||
filter->version = settings->hash_version;
|
||||
if (!filter->len) {
|
||||
if (computed)
|
||||
*computed |= BLOOM_TRUNC_EMPTY;
|
||||
filter->len = 1;
|
||||
}
|
||||
CALLOC_ARRAY(filter->data, filter->len);
|
||||
filter->to_free = filter->data;
|
||||
|
||||
hashmap_for_each_entry(&pathmap, &iter, e, entry) {
|
||||
struct bloom_key key;
|
||||
|
@ -326,7 +510,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
|
|||
} else {
|
||||
for (i = 0; i < diff_queued_diff.nr; i++)
|
||||
diff_free_filepair(diff_queued_diff.queue[i]);
|
||||
init_truncated_large_filter(filter);
|
||||
init_truncated_large_filter(filter, settings->hash_version);
|
||||
|
||||
if (computed)
|
||||
*computed |= BLOOM_TRUNC_LARGE;
|
||||
|
|
38
bloom.h
38
bloom.h
|
@ -3,13 +3,16 @@
|
|||
|
||||
struct commit;
|
||||
struct repository;
|
||||
struct commit_graph;
|
||||
|
||||
struct bloom_filter_settings {
|
||||
/*
|
||||
* The version of the hashing technique being used.
|
||||
* We currently only support version = 1 which is
|
||||
* The newest version is 2, which is
|
||||
* the seeded murmur3 hashing technique implemented
|
||||
* in bloom.c.
|
||||
* in bloom.c. Bloom filters of version 1 were created
|
||||
* with prior versions of Git, which had a bug in the
|
||||
* implementation of the hash function.
|
||||
*/
|
||||
uint32_t hash_version;
|
||||
|
||||
|
@ -52,6 +55,9 @@ struct bloom_filter_settings {
|
|||
struct bloom_filter {
|
||||
unsigned char *data;
|
||||
size_t len;
|
||||
int version;
|
||||
|
||||
void *to_free;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -68,6 +74,10 @@ struct bloom_key {
|
|||
uint32_t *hashes;
|
||||
};
|
||||
|
||||
int load_bloom_filter_from_graph(struct commit_graph *g,
|
||||
struct bloom_filter *filter,
|
||||
uint32_t graph_pos);
|
||||
|
||||
/*
|
||||
* Calculate the murmur3 32-bit hash value for the given data
|
||||
* using the given seed.
|
||||
|
@ -75,7 +85,7 @@ struct bloom_key {
|
|||
* Not considered to be cryptographically secure.
|
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
|
||||
*/
|
||||
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
|
||||
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);
|
||||
|
||||
void fill_bloom_key(const char *data,
|
||||
size_t len,
|
||||
|
@ -88,12 +98,14 @@ void add_key_to_filter(const struct bloom_key *key,
|
|||
const struct bloom_filter_settings *settings);
|
||||
|
||||
void init_bloom_filters(void);
|
||||
void deinit_bloom_filters(void);
|
||||
|
||||
enum bloom_filter_computed {
|
||||
BLOOM_NOT_COMPUTED = (1 << 0),
|
||||
BLOOM_COMPUTED = (1 << 1),
|
||||
BLOOM_TRUNC_LARGE = (1 << 2),
|
||||
BLOOM_TRUNC_EMPTY = (1 << 3),
|
||||
BLOOM_UPGRADED = (1 << 4),
|
||||
};
|
||||
|
||||
struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
|
||||
|
@ -102,8 +114,24 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
|
|||
const struct bloom_filter_settings *settings,
|
||||
enum bloom_filter_computed *computed);
|
||||
|
||||
#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
|
||||
(r), (c), 0, NULL, NULL)
|
||||
/*
|
||||
* Find the Bloom filter associated with the given commit "c".
|
||||
*
|
||||
* If any of the following are true
|
||||
*
|
||||
* - the repository does not have a commit-graph, or
|
||||
* - the repository disables reading from the commit-graph, or
|
||||
* - the given commit does not have a Bloom filter computed, or
|
||||
* - there is a Bloom filter for commit "c", but it cannot be read
|
||||
* because the filter uses an incompatible version of murmur3
|
||||
*
|
||||
* , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding
|
||||
* Bloom filter will be returned.
|
||||
*
|
||||
* For callers who wish to inspect Bloom filters with incompatible hash
|
||||
* versions, use get_or_compute_bloom_filter().
|
||||
*/
|
||||
struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c);
|
||||
|
||||
int bloom_filter_contains(const struct bloom_filter *filter,
|
||||
const struct bloom_key *key,
|
||||
|
|
|
@ -346,7 +346,6 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
|
|||
size_t chunk_size, void *data)
|
||||
{
|
||||
struct commit_graph *g = data;
|
||||
uint32_t hash_version;
|
||||
|
||||
if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
|
||||
warning(_("ignoring too-small changed-path chunk"
|
||||
|
@ -358,13 +357,9 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
|
|||
|
||||
g->chunk_bloom_data = chunk_start;
|
||||
g->chunk_bloom_data_size = chunk_size;
|
||||
hash_version = get_be32(chunk_start);
|
||||
|
||||
if (hash_version != 1)
|
||||
return 0;
|
||||
|
||||
g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
|
||||
g->bloom_filter_settings->hash_version = hash_version;
|
||||
g->bloom_filter_settings->hash_version = get_be32(chunk_start);
|
||||
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
|
||||
g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
|
||||
g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
|
||||
|
@ -461,7 +456,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
|
|||
graph->read_generation_data = 1;
|
||||
}
|
||||
|
||||
if (s->commit_graph_read_changed_paths) {
|
||||
if (s->commit_graph_changed_paths_version) {
|
||||
read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
|
||||
graph_read_bloom_index, graph);
|
||||
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
|
||||
|
@ -546,6 +541,31 @@ static int validate_mixed_generation_chain(struct commit_graph *g)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void validate_mixed_bloom_settings(struct commit_graph *g)
|
||||
{
|
||||
struct bloom_filter_settings *settings = NULL;
|
||||
for (; g; g = g->base_graph) {
|
||||
if (!g->bloom_filter_settings)
|
||||
continue;
|
||||
if (!settings) {
|
||||
settings = g->bloom_filter_settings;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g->bloom_filter_settings->bits_per_entry != settings->bits_per_entry ||
|
||||
g->bloom_filter_settings->num_hashes != settings->num_hashes ||
|
||||
g->bloom_filter_settings->hash_version != settings->hash_version) {
|
||||
g->chunk_bloom_indexes = NULL;
|
||||
g->chunk_bloom_data = NULL;
|
||||
FREE_AND_NULL(g->bloom_filter_settings);
|
||||
|
||||
warning(_("disabling Bloom filters for commit-graph "
|
||||
"layer '%s' due to incompatible settings"),
|
||||
oid_to_hex(&g->oid));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int add_graph_to_chain(struct commit_graph *g,
|
||||
struct commit_graph *chain,
|
||||
struct object_id *oids,
|
||||
|
@ -670,6 +690,7 @@ struct commit_graph *load_commit_graph_chain_fd_st(struct repository *r,
|
|||
}
|
||||
|
||||
validate_mixed_generation_chain(graph_chain);
|
||||
validate_mixed_bloom_settings(graph_chain);
|
||||
|
||||
free(oids);
|
||||
fclose(fp);
|
||||
|
@ -814,6 +835,7 @@ void close_commit_graph(struct raw_object_store *o)
|
|||
return;
|
||||
|
||||
clear_commit_graph_data_slab(&commit_graph_data_slab);
|
||||
deinit_bloom_filters();
|
||||
free_commit_graph(o->commit_graph);
|
||||
o->commit_graph = NULL;
|
||||
}
|
||||
|
@ -1152,6 +1174,7 @@ struct write_commit_graph_context {
|
|||
int count_bloom_filter_not_computed;
|
||||
int count_bloom_filter_trunc_empty;
|
||||
int count_bloom_filter_trunc_large;
|
||||
int count_bloom_filter_upgraded;
|
||||
};
|
||||
|
||||
static int write_graph_chunk_fanout(struct hashfile *f,
|
||||
|
@ -1759,6 +1782,8 @@ static void trace2_bloom_filter_write_statistics(struct write_commit_graph_conte
|
|||
ctx->count_bloom_filter_trunc_empty);
|
||||
trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
|
||||
ctx->count_bloom_filter_trunc_large);
|
||||
trace2_data_intmax("commit-graph", ctx->r, "filter-upgraded",
|
||||
ctx->count_bloom_filter_upgraded);
|
||||
}
|
||||
|
||||
static void compute_bloom_filters(struct write_commit_graph_context *ctx)
|
||||
|
@ -1800,6 +1825,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
|
|||
ctx->count_bloom_filter_trunc_empty++;
|
||||
if (computed & BLOOM_TRUNC_LARGE)
|
||||
ctx->count_bloom_filter_trunc_large++;
|
||||
} else if (computed & BLOOM_UPGRADED) {
|
||||
ctx->count_bloom_filter_upgraded++;
|
||||
} else if (computed & BLOOM_NOT_COMPUTED)
|
||||
ctx->count_bloom_filter_not_computed++;
|
||||
ctx->total_bloom_filter_data_size += filter
|
||||
|
@ -2481,6 +2508,13 @@ int write_commit_graph(struct object_directory *odb,
|
|||
}
|
||||
if (!commit_graph_compatible(r))
|
||||
return 0;
|
||||
if (r->settings.commit_graph_changed_paths_version < -1
|
||||
|| r->settings.commit_graph_changed_paths_version > 2) {
|
||||
warning(_("attempting to write a commit-graph, but "
|
||||
"'commitGraph.changedPathsVersion' (%d) is not supported"),
|
||||
r->settings.commit_graph_changed_paths_version);
|
||||
return 0;
|
||||
}
|
||||
|
||||
CALLOC_ARRAY(ctx, 1);
|
||||
ctx->r = r;
|
||||
|
@ -2493,6 +2527,7 @@ int write_commit_graph(struct object_directory *odb,
|
|||
ctx->write_generation_data = (get_configured_generation_version(r) == 2);
|
||||
ctx->num_generation_data_overflows = 0;
|
||||
|
||||
bloom_settings.hash_version = r->settings.commit_graph_changed_paths_version;
|
||||
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
|
||||
bloom_settings.bits_per_entry);
|
||||
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
|
||||
|
@ -2522,12 +2557,20 @@ int write_commit_graph(struct object_directory *odb,
|
|||
g = ctx->r->objects->commit_graph;
|
||||
|
||||
/* We have changed-paths already. Keep them in the next graph */
|
||||
if (g && g->chunk_bloom_data) {
|
||||
if (g && g->bloom_filter_settings) {
|
||||
ctx->changed_paths = 1;
|
||||
ctx->bloom_settings = g->bloom_filter_settings;
|
||||
|
||||
/* don't propagate the hash_version unless unspecified */
|
||||
if (bloom_settings.hash_version == -1)
|
||||
bloom_settings.hash_version = g->bloom_filter_settings->hash_version;
|
||||
bloom_settings.bits_per_entry = g->bloom_filter_settings->bits_per_entry;
|
||||
bloom_settings.num_hashes = g->bloom_filter_settings->num_hashes;
|
||||
bloom_settings.max_changed_paths = g->bloom_filter_settings->max_changed_paths;
|
||||
}
|
||||
}
|
||||
|
||||
bloom_settings.hash_version = bloom_settings.hash_version == 2 ? 2 : 1;
|
||||
|
||||
if (ctx->split) {
|
||||
struct commit_graph *g = ctx->r->objects->commit_graph;
|
||||
|
||||
|
@ -2611,6 +2654,9 @@ int write_commit_graph(struct object_directory *odb,
|
|||
|
||||
res = write_commit_graph_file(ctx);
|
||||
|
||||
if (ctx->changed_paths)
|
||||
deinit_bloom_filters();
|
||||
|
||||
if (ctx->split)
|
||||
mark_commit_graphs(ctx);
|
||||
|
||||
|
|
1
object.h
1
object.h
|
@ -75,6 +75,7 @@ void object_array_init(struct object_array *array);
|
|||
* commit-reach.c: 16-----19
|
||||
* sha1-name.c: 20
|
||||
* list-objects-filter.c: 21
|
||||
* bloom.c: 2122
|
||||
* builtin/fsck.c: 0--3
|
||||
* builtin/gc.c: 0
|
||||
* builtin/index-pack.c: 2021
|
||||
|
|
|
@ -23,7 +23,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
|
|||
*/
|
||||
repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
|
||||
the_repository->settings.commit_graph_generation_version = 2;
|
||||
the_repository->settings.commit_graph_read_changed_paths = 1;
|
||||
the_repository->settings.commit_graph_changed_paths_version = 1;
|
||||
g = parse_commit_graph(&the_repository->settings, (void *)data, size);
|
||||
repo_clear(the_repository);
|
||||
free_commit_graph(g);
|
||||
|
|
|
@ -23,6 +23,7 @@ void prepare_repo_settings(struct repository *r)
|
|||
int value;
|
||||
const char *strval;
|
||||
int manyfiles;
|
||||
int read_changed_paths;
|
||||
|
||||
if (!r->gitdir)
|
||||
BUG("Cannot add settings for uninitialized repository");
|
||||
|
@ -54,7 +55,10 @@ void prepare_repo_settings(struct repository *r)
|
|||
/* Commit graph config or default, does not cascade (simple) */
|
||||
repo_cfg_bool(r, "core.commitgraph", &r->settings.core_commit_graph, 1);
|
||||
repo_cfg_int(r, "commitgraph.generationversion", &r->settings.commit_graph_generation_version, 2);
|
||||
repo_cfg_bool(r, "commitgraph.readchangedpaths", &r->settings.commit_graph_read_changed_paths, 1);
|
||||
repo_cfg_bool(r, "commitgraph.readchangedpaths", &read_changed_paths, 1);
|
||||
repo_cfg_int(r, "commitgraph.changedpathsversion",
|
||||
&r->settings.commit_graph_changed_paths_version,
|
||||
read_changed_paths ? -1 : 0);
|
||||
repo_cfg_bool(r, "gc.writecommitgraph", &r->settings.gc_write_commit_graph, 1);
|
||||
repo_cfg_bool(r, "fetch.writecommitgraph", &r->settings.fetch_write_commit_graph, 0);
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ struct repo_settings {
|
|||
|
||||
int core_commit_graph;
|
||||
int commit_graph_generation_version;
|
||||
int commit_graph_read_changed_paths;
|
||||
int commit_graph_changed_paths_version;
|
||||
int gc_write_commit_graph;
|
||||
int fetch_write_commit_graph;
|
||||
int command_requires_full_index;
|
||||
|
|
26
revision.c
26
revision.c
|
@ -847,17 +847,28 @@ static int rev_compare_tree(struct rev_info *revs,
|
|||
return tree_difference;
|
||||
}
|
||||
|
||||
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
|
||||
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
|
||||
int nth_parent)
|
||||
{
|
||||
struct tree *t1 = repo_get_commit_tree(the_repository, commit);
|
||||
int bloom_ret = -1;
|
||||
|
||||
if (!t1)
|
||||
return 0;
|
||||
|
||||
if (!nth_parent && revs->bloom_keys_nr) {
|
||||
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
|
||||
if (!bloom_ret)
|
||||
return 1;
|
||||
}
|
||||
|
||||
tree_difference = REV_TREE_SAME;
|
||||
revs->pruning.flags.has_changes = 0;
|
||||
diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
|
||||
|
||||
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
|
||||
count_bloom_filter_false_positive++;
|
||||
|
||||
return tree_difference == REV_TREE_SAME;
|
||||
}
|
||||
|
||||
|
@ -895,7 +906,7 @@ static int compact_treesame(struct rev_info *revs, struct commit *commit, unsign
|
|||
if (nth_parent != 0)
|
||||
die("compact_treesame %u", nth_parent);
|
||||
old_same = !!(commit->object.flags & TREESAME);
|
||||
if (rev_same_tree_as_empty(revs, commit))
|
||||
if (rev_same_tree_as_empty(revs, commit, nth_parent))
|
||||
commit->object.flags |= TREESAME;
|
||||
else
|
||||
commit->object.flags &= ~TREESAME;
|
||||
|
@ -991,7 +1002,14 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
|
|||
return;
|
||||
|
||||
if (!commit->parents) {
|
||||
if (rev_same_tree_as_empty(revs, commit))
|
||||
/*
|
||||
* Pretend as if we are comparing ourselves to the
|
||||
* (non-existent) first parent of this commit object. Even
|
||||
* though no such parent exists, its changed-path Bloom filter
|
||||
* (if one exists) is relative to the empty tree, using Bloom
|
||||
* filters is allowed here.
|
||||
*/
|
||||
if (rev_same_tree_as_empty(revs, commit, 0))
|
||||
commit->object.flags |= TREESAME;
|
||||
return;
|
||||
}
|
||||
|
@ -1072,7 +1090,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
|
|||
|
||||
case REV_TREE_NEW:
|
||||
if (revs->remove_empty_trees &&
|
||||
rev_same_tree_as_empty(revs, p)) {
|
||||
rev_same_tree_as_empty(revs, p, nth_parent)) {
|
||||
/* We are adding all the specified
|
||||
* paths from this parent, so the
|
||||
* history beyond this parent is not
|
||||
|
|
|
@ -51,6 +51,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
|
|||
|
||||
static const char *bloom_usage = "\n"
|
||||
" test-tool bloom get_murmur3 <string>\n"
|
||||
" test-tool bloom get_murmur3_seven_highbit\n"
|
||||
" test-tool bloom generate_filter <string> [<string>...]\n"
|
||||
" test-tool bloom get_filter_for_commit <commit-hex>\n";
|
||||
|
||||
|
@ -65,7 +66,13 @@ int cmd__bloom(int argc, const char **argv)
|
|||
uint32_t hashed;
|
||||
if (argc < 3)
|
||||
usage(bloom_usage);
|
||||
hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
|
||||
hashed = murmur3_seeded_v2(0, argv[2], strlen(argv[2]));
|
||||
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
|
||||
}
|
||||
|
||||
if (!strcmp(argv[1], "get_murmur3_seven_highbit")) {
|
||||
uint32_t hashed;
|
||||
hashed = murmur3_seeded_v2(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7);
|
||||
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,20 +7,8 @@
|
|||
#include "bloom.h"
|
||||
#include "setup.h"
|
||||
|
||||
int cmd__read_graph(int argc UNUSED, const char **argv UNUSED)
|
||||
static void dump_graph_info(struct commit_graph *graph)
|
||||
{
|
||||
struct commit_graph *graph = NULL;
|
||||
struct object_directory *odb;
|
||||
|
||||
setup_git_directory();
|
||||
odb = the_repository->objects->odb;
|
||||
|
||||
prepare_repo_settings(the_repository);
|
||||
|
||||
graph = read_commit_graph_one(the_repository, odb);
|
||||
if (!graph)
|
||||
return 1;
|
||||
|
||||
printf("header: %08x %d %d %d %d\n",
|
||||
ntohl(*(uint32_t*)graph->data),
|
||||
*(unsigned char*)(graph->data + 4),
|
||||
|
@ -59,8 +47,57 @@ int cmd__read_graph(int argc UNUSED, const char **argv UNUSED)
|
|||
if (graph->topo_levels)
|
||||
printf(" topo_levels");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void dump_graph_bloom_filters(struct commit_graph *graph)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < graph->num_commits + graph->num_commits_in_base; i++) {
|
||||
struct bloom_filter filter = { 0 };
|
||||
size_t j;
|
||||
|
||||
if (load_bloom_filter_from_graph(graph, &filter, i) < 0) {
|
||||
fprintf(stderr, "missing Bloom filter for graph "
|
||||
"position %"PRIu32"\n", i);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = 0; j < filter.len; j++)
|
||||
printf("%02x", filter.data[j]);
|
||||
if (filter.len)
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
int cmd__read_graph(int argc, const char **argv)
|
||||
{
|
||||
struct commit_graph *graph = NULL;
|
||||
struct object_directory *odb;
|
||||
int ret = 0;
|
||||
|
||||
setup_git_directory();
|
||||
odb = the_repository->objects->odb;
|
||||
|
||||
prepare_repo_settings(the_repository);
|
||||
|
||||
graph = read_commit_graph_one(the_repository, odb);
|
||||
if (!graph) {
|
||||
ret = 1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (argc <= 1)
|
||||
dump_graph_info(graph);
|
||||
else if (!strcmp(argv[1], "bloom-filters"))
|
||||
dump_graph_bloom_filters(graph);
|
||||
else {
|
||||
fprintf(stderr, "unknown sub-command: '%s'\n", argv[1]);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
done:
|
||||
UNLEAK(graph);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -29,6 +29,14 @@ test_expect_success 'compute unseeded murmur3 hash for test string 2' '
|
|||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'compute unseeded murmur3 hash for test string 3' '
|
||||
cat >expect <<-\EOF &&
|
||||
Murmur3 Hash with seed=0:0xa183ccfd
|
||||
EOF
|
||||
test-tool bloom get_murmur3_seven_highbit >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'compute bloom key for empty string' '
|
||||
cat >expect <<-\EOF &&
|
||||
Hashes:0x5615800c|0x5b966560|0x61174ab4|0x66983008|0x6c19155c|0x7199fab0|0x771ae004|
|
||||
|
|
|
@ -82,7 +82,23 @@ test_bloom_filters_used () {
|
|||
test_bloom_filters_not_used () {
|
||||
log_args=$1
|
||||
setup "$log_args" &&
|
||||
! grep -q "statistics:{\"filter_not_present\":" "$TRASH_DIRECTORY/trace.perf" &&
|
||||
|
||||
if grep -q "statistics:{\"filter_not_present\":" "$TRASH_DIRECTORY/trace.perf"
|
||||
then
|
||||
# if the Bloom filter system is initialized, ensure that no
|
||||
# filters were used
|
||||
data="statistics:{"
|
||||
# unusable filters (e.g., those computed with a
|
||||
# different value of commitGraph.changedPathsVersion)
|
||||
# are counted in the filter_not_present bucket, so any
|
||||
# value is OK there.
|
||||
data="$data\"filter_not_present\":[0-9][0-9]*,"
|
||||
data="$data\"maybe\":0,"
|
||||
data="$data\"definitely_not\":0,"
|
||||
data="$data\"false_positive\":0}"
|
||||
|
||||
grep -q "$data" "$TRASH_DIRECTORY/trace.perf"
|
||||
fi &&
|
||||
test_cmp log_wo_bloom log_w_bloom
|
||||
}
|
||||
|
||||
|
@ -163,7 +179,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
|
|||
|
||||
test_bloom_filters_used_when_some_filters_are_missing () {
|
||||
log_args=$1
|
||||
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9"
|
||||
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":10"
|
||||
setup "$log_args" &&
|
||||
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
|
||||
test_cmp log_wo_bloom log_w_bloom
|
||||
|
@ -206,6 +222,10 @@ test_filter_trunc_large () {
|
|||
grep "\"key\":\"filter-trunc-large\",\"value\":\"$1\"" $2
|
||||
}
|
||||
|
||||
test_filter_upgraded () {
|
||||
grep "\"key\":\"filter-upgraded\",\"value\":\"$1\"" $2
|
||||
}
|
||||
|
||||
test_expect_success 'correctly report changes over limit' '
|
||||
git init limits &&
|
||||
(
|
||||
|
@ -405,8 +425,307 @@ test_expect_success 'Bloom generation backfills empty commits' '
|
|||
)
|
||||
'
|
||||
|
||||
graph=.git/objects/info/commit-graph
|
||||
graphdir=.git/objects/info/commit-graphs
|
||||
chain=$graphdir/commit-graph-chain
|
||||
|
||||
test_expect_success 'setup for mixed Bloom setting tests' '
|
||||
repo=mixed-bloom-settings &&
|
||||
|
||||
git init $repo &&
|
||||
for i in one two three
|
||||
do
|
||||
test_commit -C $repo $i file || return 1
|
||||
done
|
||||
'
|
||||
|
||||
test_expect_success 'ensure Bloom filters with incompatible settings are ignored' '
|
||||
# Compute Bloom filters with "unusual" settings.
|
||||
git -C $repo rev-parse one >in &&
|
||||
GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=3 git -C $repo commit-graph write \
|
||||
--stdin-commits --changed-paths --split <in &&
|
||||
layer=$(head -n 1 $repo/$chain) &&
|
||||
|
||||
# A commit-graph layer without Bloom filters "hides" the layers
|
||||
# below ...
|
||||
git -C $repo rev-parse two >in &&
|
||||
git -C $repo commit-graph write --stdin-commits --no-changed-paths \
|
||||
--split=no-merge <in &&
|
||||
|
||||
# Another commit-graph layer that has Bloom filters, but with
|
||||
# standard settings, and is thus incompatible with the base
|
||||
# layer written above.
|
||||
git -C $repo rev-parse HEAD >in &&
|
||||
git -C $repo commit-graph write --stdin-commits --changed-paths \
|
||||
--split=no-merge <in &&
|
||||
|
||||
test_line_count = 3 $repo/$chain &&
|
||||
|
||||
# Ensure that incompatible Bloom filters are ignored.
|
||||
git -C $repo -c core.commitGraph=false log --oneline --no-decorate -- file \
|
||||
>expect 2>err &&
|
||||
git -C $repo log --oneline --no-decorate -- file >actual 2>err &&
|
||||
test_cmp expect actual &&
|
||||
grep "disabling Bloom filters for commit-graph layer .$layer." err
|
||||
'
|
||||
|
||||
test_expect_success 'merge graph layers with incompatible Bloom settings' '
|
||||
# Ensure that incompatible Bloom filters are ignored when
|
||||
# merging existing layers.
|
||||
>trace2.txt &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git -C $repo commit-graph write --reachable --changed-paths 2>err &&
|
||||
grep "disabling Bloom filters for commit-graph layer .$layer." err &&
|
||||
grep "{\"hash_version\":1,\"num_hashes\":7,\"bits_per_entry\":10,\"max_changed_paths\":512" trace2.txt &&
|
||||
|
||||
test_path_is_file $repo/$graph &&
|
||||
test_dir_is_empty $repo/$graphdir &&
|
||||
|
||||
git -C $repo -c core.commitGraph=false log --oneline --no-decorate -- \
|
||||
file >expect &&
|
||||
trace_out="$(pwd)/trace.perf" &&
|
||||
GIT_TRACE2_PERF="$trace_out" \
|
||||
git -C $repo log --oneline --no-decorate -- file >actual 2>err &&
|
||||
|
||||
test_cmp expect actual &&
|
||||
grep "statistics:{\"filter_not_present\":0," trace.perf &&
|
||||
test_must_be_empty err
|
||||
'
|
||||
|
||||
# chosen to be the same under all Unicode normalization forms
|
||||
CENT=$(printf "\302\242")
|
||||
|
||||
test_expect_success 'ensure Bloom filter with incompatible versions are ignored' '
|
||||
rm "$repo/$graph" &&
|
||||
|
||||
git -C $repo log --oneline --no-decorate -- $CENT >expect &&
|
||||
|
||||
# Compute v1 Bloom filters for commits at the bottom.
|
||||
git -C $repo rev-parse HEAD^ >in &&
|
||||
git -C $repo commit-graph write --stdin-commits --changed-paths \
|
||||
--split <in &&
|
||||
|
||||
# Compute v2 Bloomfilters for the rest of the commits at the top.
|
||||
git -C $repo rev-parse HEAD >in &&
|
||||
git -C $repo -c commitGraph.changedPathsVersion=2 commit-graph write \
|
||||
--stdin-commits --changed-paths --split=no-merge <in &&
|
||||
|
||||
test_line_count = 2 $repo/$chain &&
|
||||
|
||||
git -C $repo log --oneline --no-decorate -- $CENT >actual 2>err &&
|
||||
test_cmp expect actual &&
|
||||
|
||||
layer="$(head -n 1 $repo/$chain)" &&
|
||||
cat >expect.err <<-EOF &&
|
||||
warning: disabling Bloom filters for commit-graph layer $SQ$layer$SQ due to incompatible settings
|
||||
EOF
|
||||
test_cmp expect.err err &&
|
||||
|
||||
# Merge the two layers with incompatible bloom filter versions,
|
||||
# ensuring that the v2 filters are used.
|
||||
>trace2.txt &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git -C $repo -c commitGraph.changedPathsVersion=2 commit-graph write --reachable --changed-paths 2>err &&
|
||||
grep "disabling Bloom filters for commit-graph layer .$layer." err &&
|
||||
grep "{\"hash_version\":2,\"num_hashes\":7,\"bits_per_entry\":10,\"max_changed_paths\":512" trace2.txt
|
||||
'
|
||||
|
||||
get_first_changed_path_filter () {
|
||||
test-tool read-graph bloom-filters >filters.dat &&
|
||||
head -n 1 filters.dat
|
||||
}
|
||||
|
||||
test_expect_success 'set up repo with high bit path, version 1 changed-path' '
|
||||
git init highbit1 &&
|
||||
test_commit -C highbit1 c1 "$CENT" &&
|
||||
git -C highbit1 commit-graph write --reachable --changed-paths
|
||||
'
|
||||
|
||||
test_expect_success 'setup check value of version 1 changed-path' '
|
||||
(
|
||||
cd highbit1 &&
|
||||
echo "52a9" >expect &&
|
||||
get_first_changed_path_filter >actual
|
||||
)
|
||||
'
|
||||
|
||||
# expect will not match actual if char is unsigned by default. Write the test
|
||||
# in this way, so that a user running this test script can still see if the two
|
||||
# files match. (It will appear as an ordinary success if they match, and a skip
|
||||
# if not.)
|
||||
if test_cmp highbit1/expect highbit1/actual
|
||||
then
|
||||
test_set_prereq SIGNED_CHAR_BY_DEFAULT
|
||||
fi
|
||||
test_expect_success SIGNED_CHAR_BY_DEFAULT 'check value of version 1 changed-path' '
|
||||
# Only the prereq matters for this test.
|
||||
true
|
||||
'
|
||||
|
||||
test_expect_success 'setup make another commit' '
|
||||
# "git log" does not use Bloom filters for root commits - see how, in
|
||||
# revision.c, rev_compare_tree() (the only code path that eventually calls
|
||||
# get_bloom_filter()) is only called by try_to_simplify_commit() when the commit
|
||||
# has one parent. Therefore, make another commit so that we perform the tests on
|
||||
# a non-root commit.
|
||||
test_commit -C highbit1 anotherc1 "another$CENT"
|
||||
'
|
||||
|
||||
test_expect_success 'version 1 changed-path used when version 1 requested' '
|
||||
(
|
||||
cd highbit1 &&
|
||||
test_bloom_filters_used "-- another$CENT"
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'version 1 changed-path not used when version 2 requested' '
|
||||
(
|
||||
cd highbit1 &&
|
||||
git config --add commitGraph.changedPathsVersion 2 &&
|
||||
test_bloom_filters_not_used "-- another$CENT"
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'version 1 changed-path used when autodetect requested' '
|
||||
(
|
||||
cd highbit1 &&
|
||||
git config --add commitGraph.changedPathsVersion -1 &&
|
||||
test_bloom_filters_used "-- another$CENT"
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'when writing another commit graph, preserve existing version 1 of changed-path' '
|
||||
test_commit -C highbit1 c1double "$CENT$CENT" &&
|
||||
git -C highbit1 commit-graph write --reachable --changed-paths &&
|
||||
(
|
||||
cd highbit1 &&
|
||||
git config --add commitGraph.changedPathsVersion -1 &&
|
||||
echo "options: bloom(1,10,7) read_generation_data" >expect &&
|
||||
test-tool read-graph >full &&
|
||||
grep options full >actual &&
|
||||
test_cmp expect actual
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'set up repo with high bit path, version 2 changed-path' '
|
||||
git init highbit2 &&
|
||||
git -C highbit2 config --add commitGraph.changedPathsVersion 2 &&
|
||||
test_commit -C highbit2 c2 "$CENT" &&
|
||||
git -C highbit2 commit-graph write --reachable --changed-paths
|
||||
'
|
||||
|
||||
test_expect_success 'check value of version 2 changed-path' '
|
||||
(
|
||||
cd highbit2 &&
|
||||
echo "c01f" >expect &&
|
||||
get_first_changed_path_filter >actual &&
|
||||
test_cmp expect actual
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'setup make another commit' '
|
||||
# "git log" does not use Bloom filters for root commits - see how, in
|
||||
# revision.c, rev_compare_tree() (the only code path that eventually calls
|
||||
# get_bloom_filter()) is only called by try_to_simplify_commit() when the commit
|
||||
# has one parent. Therefore, make another commit so that we perform the tests on
|
||||
# a non-root commit.
|
||||
test_commit -C highbit2 anotherc2 "another$CENT"
|
||||
'
|
||||
|
||||
test_expect_success 'version 2 changed-path used when version 2 requested' '
|
||||
(
|
||||
cd highbit2 &&
|
||||
test_bloom_filters_used "-- another$CENT"
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'version 2 changed-path not used when version 1 requested' '
|
||||
(
|
||||
cd highbit2 &&
|
||||
git config --add commitGraph.changedPathsVersion 1 &&
|
||||
test_bloom_filters_not_used "-- another$CENT"
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'version 2 changed-path used when autodetect requested' '
|
||||
(
|
||||
cd highbit2 &&
|
||||
git config --add commitGraph.changedPathsVersion -1 &&
|
||||
test_bloom_filters_used "-- another$CENT"
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'when writing another commit graph, preserve existing version 2 of changed-path' '
|
||||
test_commit -C highbit2 c2double "$CENT$CENT" &&
|
||||
git -C highbit2 commit-graph write --reachable --changed-paths &&
|
||||
(
|
||||
cd highbit2 &&
|
||||
git config --add commitGraph.changedPathsVersion -1 &&
|
||||
echo "options: bloom(2,10,7) read_generation_data" >expect &&
|
||||
test-tool read-graph >full &&
|
||||
grep options full >actual &&
|
||||
test_cmp expect actual
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'when writing commit graph, do not reuse changed-path of another version' '
|
||||
git init doublewrite &&
|
||||
test_commit -C doublewrite c "$CENT" &&
|
||||
|
||||
git -C doublewrite config --add commitGraph.changedPathsVersion 1 &&
|
||||
>trace2.txt &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git -C doublewrite commit-graph write --reachable --changed-paths &&
|
||||
test_filter_computed 1 trace2.txt &&
|
||||
test_filter_upgraded 0 trace2.txt &&
|
||||
|
||||
git -C doublewrite commit-graph write --reachable --changed-paths &&
|
||||
for v in -2 3
|
||||
do
|
||||
git -C doublewrite config --add commitGraph.changedPathsVersion $v &&
|
||||
git -C doublewrite commit-graph write --reachable --changed-paths 2>err &&
|
||||
cat >expect <<-EOF &&
|
||||
warning: attempting to write a commit-graph, but ${SQ}commitGraph.changedPathsVersion${SQ} ($v) is not supported
|
||||
EOF
|
||||
test_cmp expect err || return 1
|
||||
done &&
|
||||
|
||||
git -C doublewrite config --add commitGraph.changedPathsVersion 2 &&
|
||||
>trace2.txt &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git -C doublewrite commit-graph write --reachable --changed-paths &&
|
||||
test_filter_computed 1 trace2.txt &&
|
||||
test_filter_upgraded 0 trace2.txt &&
|
||||
|
||||
(
|
||||
cd doublewrite &&
|
||||
echo "c01f" >expect &&
|
||||
get_first_changed_path_filter >actual &&
|
||||
test_cmp expect actual
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'when writing commit graph, reuse changed-path of another version where possible' '
|
||||
git init upgrade &&
|
||||
|
||||
test_commit -C upgrade base no-high-bits &&
|
||||
|
||||
git -C upgrade config --add commitGraph.changedPathsVersion 1 &&
|
||||
>trace2.txt &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git -C upgrade commit-graph write --reachable --changed-paths &&
|
||||
test_filter_computed 1 trace2.txt &&
|
||||
test_filter_upgraded 0 trace2.txt &&
|
||||
|
||||
git -C upgrade config --add commitGraph.changedPathsVersion 2 &&
|
||||
>trace2.txt &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git -C upgrade commit-graph write --reachable --changed-paths &&
|
||||
test_filter_computed 0 trace2.txt &&
|
||||
test_filter_upgraded 1 trace2.txt
|
||||
'
|
||||
|
||||
corrupt_graph () {
|
||||
graph=.git/objects/info/commit-graph &&
|
||||
test_when_finished "rm -rf $graph" &&
|
||||
git commit-graph write --reachable --changed-paths &&
|
||||
corrupt_chunk_file $graph "$@"
|
||||
|
|
Loading…
Reference in New Issue