Browse Source
Introduce an extension to the commit-graph to make it efficient to check for the paths that were modified at each commit using Bloom filters. * gs/commit-graph-path-filter: bloom: ignore renames when computing changed paths commit-graph: add GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS test flag t4216: add end to end tests for git log with Bloom filters revision.c: add trace2 stats around Bloom filter usage revision.c: use Bloom filters to speed up path based revision walks commit-graph: add --changed-paths option to write subcommand commit-graph: reuse existing Bloom filters during write commit-graph: write Bloom filters to commit graph file commit-graph: examine commits by generation number commit-graph: examine changed-path objects in pack order commit-graph: compute Bloom filters for changed paths diff: halt tree-diff early after max_changes bloom.c: core Bloom filter implementation for changed paths. bloom.c: introduce core Bloom filter constructs bloom.c: add the murmur3 hash implementation commit-graph: define and use MAX_NUM_CHUNKSmaint
Junio C Hamano
5 years ago
22 changed files with 1140 additions and 11 deletions
@ -0,0 +1,276 @@
@@ -0,0 +1,276 @@
|
||||
#include "git-compat-util.h" |
||||
#include "bloom.h" |
||||
#include "diff.h" |
||||
#include "diffcore.h" |
||||
#include "revision.h" |
||||
#include "hashmap.h" |
||||
#include "commit-graph.h" |
||||
#include "commit.h" |
||||
|
||||
define_commit_slab(bloom_filter_slab, struct bloom_filter); |
||||
|
||||
struct bloom_filter_slab bloom_filters; |
||||
|
||||
struct pathmap_hash_entry { |
||||
struct hashmap_entry entry; |
||||
const char path[FLEX_ARRAY]; |
||||
}; |
||||
|
||||
static uint32_t rotate_left(uint32_t value, int32_t count) |
||||
{ |
||||
uint32_t mask = 8 * sizeof(uint32_t) - 1; |
||||
count &= mask; |
||||
return ((value << count) | (value >> ((-count) & mask))); |
||||
} |
||||
|
||||
static inline unsigned char get_bitmask(uint32_t pos) |
||||
{ |
||||
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); |
||||
} |
||||
|
||||
static int load_bloom_filter_from_graph(struct commit_graph *g, |
||||
struct bloom_filter *filter, |
||||
struct commit *c) |
||||
{ |
||||
uint32_t lex_pos, start_index, end_index; |
||||
|
||||
while (c->graph_pos < g->num_commits_in_base) |
||||
g = g->base_graph; |
||||
|
||||
/* The commit graph commit 'c' lives in doesn't carry bloom filters. */ |
||||
if (!g->chunk_bloom_indexes) |
||||
return 0; |
||||
|
||||
lex_pos = c->graph_pos - g->num_commits_in_base; |
||||
|
||||
end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos); |
||||
|
||||
if (lex_pos > 0) |
||||
start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1)); |
||||
else |
||||
start_index = 0; |
||||
|
||||
filter->len = end_index - start_index; |
||||
filter->data = (unsigned char *)(g->chunk_bloom_data + |
||||
sizeof(unsigned char) * start_index + |
||||
BLOOMDATA_CHUNK_HEADER_SIZE); |
||||
|
||||
return 1; |
||||
} |
||||
|
||||
/* |
||||
* Calculate the murmur3 32-bit hash value for the given data |
||||
* using the given seed. |
||||
* Produces a uniformly distributed hash value. |
||||
* Not considered to be cryptographically secure. |
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm |
||||
*/ |
||||
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len) |
||||
{ |
||||
const uint32_t c1 = 0xcc9e2d51; |
||||
const uint32_t c2 = 0x1b873593; |
||||
const uint32_t r1 = 15; |
||||
const uint32_t r2 = 13; |
||||
const uint32_t m = 5; |
||||
const uint32_t n = 0xe6546b64; |
||||
int i; |
||||
uint32_t k1 = 0; |
||||
const char *tail; |
||||
|
||||
int len4 = len / sizeof(uint32_t); |
||||
|
||||
uint32_t k; |
||||
for (i = 0; i < len4; i++) { |
||||
uint32_t byte1 = (uint32_t)data[4*i]; |
||||
uint32_t byte2 = ((uint32_t)data[4*i + 1]) << 8; |
||||
uint32_t byte3 = ((uint32_t)data[4*i + 2]) << 16; |
||||
uint32_t byte4 = ((uint32_t)data[4*i + 3]) << 24; |
||||
k = byte1 | byte2 | byte3 | byte4; |
||||
k *= c1; |
||||
k = rotate_left(k, r1); |
||||
k *= c2; |
||||
|
||||
seed ^= k; |
||||
seed = rotate_left(seed, r2) * m + n; |
||||
} |
||||
|
||||
tail = (data + len4 * sizeof(uint32_t)); |
||||
|
||||
switch (len & (sizeof(uint32_t) - 1)) { |
||||
case 3: |
||||
k1 ^= ((uint32_t)tail[2]) << 16; |
||||
/*-fallthrough*/ |
||||
case 2: |
||||
k1 ^= ((uint32_t)tail[1]) << 8; |
||||
/*-fallthrough*/ |
||||
case 1: |
||||
k1 ^= ((uint32_t)tail[0]) << 0; |
||||
k1 *= c1; |
||||
k1 = rotate_left(k1, r1); |
||||
k1 *= c2; |
||||
seed ^= k1; |
||||
break; |
||||
} |
||||
|
||||
seed ^= (uint32_t)len; |
||||
seed ^= (seed >> 16); |
||||
seed *= 0x85ebca6b; |
||||
seed ^= (seed >> 13); |
||||
seed *= 0xc2b2ae35; |
||||
seed ^= (seed >> 16); |
||||
|
||||
return seed; |
||||
} |
||||
|
||||
void fill_bloom_key(const char *data, |
||||
size_t len, |
||||
struct bloom_key *key, |
||||
const struct bloom_filter_settings *settings) |
||||
{ |
||||
int i; |
||||
const uint32_t seed0 = 0x293ae76f; |
||||
const uint32_t seed1 = 0x7e646e2c; |
||||
const uint32_t hash0 = murmur3_seeded(seed0, data, len); |
||||
const uint32_t hash1 = murmur3_seeded(seed1, data, len); |
||||
|
||||
key->hashes = (uint32_t *)xcalloc(settings->num_hashes, sizeof(uint32_t)); |
||||
for (i = 0; i < settings->num_hashes; i++) |
||||
key->hashes[i] = hash0 + i * hash1; |
||||
} |
||||
|
||||
void add_key_to_filter(const struct bloom_key *key, |
||||
struct bloom_filter *filter, |
||||
const struct bloom_filter_settings *settings) |
||||
{ |
||||
int i; |
||||
uint64_t mod = filter->len * BITS_PER_WORD; |
||||
|
||||
for (i = 0; i < settings->num_hashes; i++) { |
||||
uint64_t hash_mod = key->hashes[i] % mod; |
||||
uint64_t block_pos = hash_mod / BITS_PER_WORD; |
||||
|
||||
filter->data[block_pos] |= get_bitmask(hash_mod); |
||||
} |
||||
} |
||||
|
||||
void init_bloom_filters(void) |
||||
{ |
||||
init_bloom_filter_slab(&bloom_filters); |
||||
} |
||||
|
||||
struct bloom_filter *get_bloom_filter(struct repository *r, |
||||
struct commit *c, |
||||
int compute_if_not_present) |
||||
{ |
||||
struct bloom_filter *filter; |
||||
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; |
||||
int i; |
||||
struct diff_options diffopt; |
||||
int max_changes = 512; |
||||
|
||||
if (bloom_filters.slab_size == 0) |
||||
return NULL; |
||||
|
||||
filter = bloom_filter_slab_at(&bloom_filters, c); |
||||
|
||||
if (!filter->data) { |
||||
load_commit_graph_info(r, c); |
||||
if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && |
||||
r->objects->commit_graph->chunk_bloom_indexes) { |
||||
if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) |
||||
return filter; |
||||
else |
||||
return NULL; |
||||
} |
||||
} |
||||
|
||||
if (filter->data || !compute_if_not_present) |
||||
return filter; |
||||
|
||||
repo_diff_setup(r, &diffopt); |
||||
diffopt.flags.recursive = 1; |
||||
diffopt.detect_rename = 0; |
||||
diffopt.max_changes = max_changes; |
||||
diff_setup_done(&diffopt); |
||||
|
||||
if (c->parents) |
||||
diff_tree_oid(&c->parents->item->object.oid, &c->object.oid, "", &diffopt); |
||||
else |
||||
diff_tree_oid(NULL, &c->object.oid, "", &diffopt); |
||||
diffcore_std(&diffopt); |
||||
|
||||
if (diff_queued_diff.nr <= max_changes) { |
||||
struct hashmap pathmap; |
||||
struct pathmap_hash_entry *e; |
||||
struct hashmap_iter iter; |
||||
hashmap_init(&pathmap, NULL, NULL, 0); |
||||
|
||||
for (i = 0; i < diff_queued_diff.nr; i++) { |
||||
const char *path = diff_queued_diff.queue[i]->two->path; |
||||
|
||||
/* |
||||
* Add each leading directory of the changed file, i.e. for |
||||
* 'dir/subdir/file' add 'dir' and 'dir/subdir' as well, so |
||||
* the Bloom filter could be used to speed up commands like |
||||
* 'git log dir/subdir', too. |
||||
* |
||||
* Note that directories are added without the trailing '/'. |
||||
*/ |
||||
do { |
||||
char *last_slash = strrchr(path, '/'); |
||||
|
||||
FLEX_ALLOC_STR(e, path, path); |
||||
hashmap_entry_init(&e->entry, strhash(path)); |
||||
hashmap_add(&pathmap, &e->entry); |
||||
|
||||
if (!last_slash) |
||||
last_slash = (char*)path; |
||||
*last_slash = '\0'; |
||||
|
||||
} while (*path); |
||||
|
||||
diff_free_filepair(diff_queued_diff.queue[i]); |
||||
} |
||||
|
||||
filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; |
||||
filter->data = xcalloc(filter->len, sizeof(unsigned char)); |
||||
|
||||
hashmap_for_each_entry(&pathmap, &iter, e, entry) { |
||||
struct bloom_key key; |
||||
fill_bloom_key(e->path, strlen(e->path), &key, &settings); |
||||
add_key_to_filter(&key, filter, &settings); |
||||
} |
||||
|
||||
hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry); |
||||
} else { |
||||
for (i = 0; i < diff_queued_diff.nr; i++) |
||||
diff_free_filepair(diff_queued_diff.queue[i]); |
||||
filter->data = NULL; |
||||
filter->len = 0; |
||||
} |
||||
|
||||
free(diff_queued_diff.queue); |
||||
DIFF_QUEUE_CLEAR(&diff_queued_diff); |
||||
|
||||
return filter; |
||||
} |
||||
|
||||
int bloom_filter_contains(const struct bloom_filter *filter, |
||||
const struct bloom_key *key, |
||||
const struct bloom_filter_settings *settings) |
||||
{ |
||||
int i; |
||||
uint64_t mod = filter->len * BITS_PER_WORD; |
||||
|
||||
if (!mod) |
||||
return -1; |
||||
|
||||
for (i = 0; i < settings->num_hashes; i++) { |
||||
uint64_t hash_mod = key->hashes[i] % mod; |
||||
uint64_t block_pos = hash_mod / BITS_PER_WORD; |
||||
if (!(filter->data[block_pos] & get_bitmask(hash_mod))) |
||||
return 0; |
||||
} |
||||
|
||||
return 1; |
||||
} |
@ -0,0 +1,90 @@
@@ -0,0 +1,90 @@
|
||||
#ifndef BLOOM_H |
||||
#define BLOOM_H |
||||
|
||||
struct commit; |
||||
struct repository; |
||||
|
||||
struct bloom_filter_settings { |
||||
/* |
||||
* The version of the hashing technique being used. |
||||
* We currently only support version = 1 which is |
||||
* the seeded murmur3 hashing technique implemented |
||||
* in bloom.c. |
||||
*/ |
||||
uint32_t hash_version; |
||||
|
||||
/* |
||||
* The number of times a path is hashed, i.e. the |
||||
* number of bit positions tht cumulatively |
||||
* determine whether a path is present in the |
||||
* Bloom filter. |
||||
*/ |
||||
uint32_t num_hashes; |
||||
|
||||
/* |
||||
* The minimum number of bits per entry in the Bloom |
||||
* filter. If the filter contains 'n' entries, then |
||||
* filter size is the minimum number of 8-bit words |
||||
* that contain n*b bits. |
||||
*/ |
||||
uint32_t bits_per_entry; |
||||
}; |
||||
|
||||
#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 } |
||||
#define BITS_PER_WORD 8 |
||||
#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t) |
||||
|
||||
/* |
||||
* A bloom_filter struct represents a data segment to |
||||
* use when testing hash values. The 'len' member |
||||
* dictates how many entries are stored in |
||||
* 'data'. |
||||
*/ |
||||
struct bloom_filter { |
||||
unsigned char *data; |
||||
size_t len; |
||||
}; |
||||
|
||||
/* |
||||
* A bloom_key represents the k hash values for a |
||||
* given string. These can be precomputed and |
||||
* stored in a bloom_key for re-use when testing |
||||
* against a bloom_filter. The number of hashes is |
||||
* given by the Bloom filter settings and is the same |
||||
* for all Bloom filters and keys interacting with |
||||
* the loaded version of the commit graph file and |
||||
* the Bloom data chunks. |
||||
*/ |
||||
struct bloom_key { |
||||
uint32_t *hashes; |
||||
}; |
||||
|
||||
/* |
||||
* Calculate the murmur3 32-bit hash value for the given data |
||||
* using the given seed. |
||||
* Produces a uniformly distributed hash value. |
||||
* Not considered to be cryptographically secure. |
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm |
||||
*/ |
||||
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len); |
||||
|
||||
void fill_bloom_key(const char *data, |
||||
size_t len, |
||||
struct bloom_key *key, |
||||
const struct bloom_filter_settings *settings); |
||||
|
||||
void add_key_to_filter(const struct bloom_key *key, |
||||
struct bloom_filter *filter, |
||||
const struct bloom_filter_settings *settings); |
||||
|
||||
void init_bloom_filters(void); |
||||
|
||||
struct bloom_filter *get_bloom_filter(struct repository *r, |
||||
struct commit *c, |
||||
int compute_if_not_present); |
||||
|
||||
int bloom_filter_contains(const struct bloom_filter *filter, |
||||
const struct bloom_key *key, |
||||
const struct bloom_filter_settings *settings); |
||||
|
||||
#endif |
@ -0,0 +1,81 @@
@@ -0,0 +1,81 @@
|
||||
#include "git-compat-util.h" |
||||
#include "bloom.h" |
||||
#include "test-tool.h" |
||||
#include "commit.h" |
||||
|
||||
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; |
||||
|
||||
static void add_string_to_filter(const char *data, struct bloom_filter *filter) { |
||||
struct bloom_key key; |
||||
int i; |
||||
|
||||
fill_bloom_key(data, strlen(data), &key, &settings); |
||||
printf("Hashes:"); |
||||
for (i = 0; i < settings.num_hashes; i++){ |
||||
printf("0x%08x|", key.hashes[i]); |
||||
} |
||||
printf("\n"); |
||||
add_key_to_filter(&key, filter, &settings); |
||||
} |
||||
|
||||
static void print_bloom_filter(struct bloom_filter *filter) { |
||||
int i; |
||||
|
||||
if (!filter) { |
||||
printf("No filter.\n"); |
||||
return; |
||||
} |
||||
printf("Filter_Length:%d\n", (int)filter->len); |
||||
printf("Filter_Data:"); |
||||
for (i = 0; i < filter->len; i++){ |
||||
printf("%02x|", filter->data[i]); |
||||
} |
||||
printf("\n"); |
||||
} |
||||
|
||||
static void get_bloom_filter_for_commit(const struct object_id *commit_oid) |
||||
{ |
||||
struct commit *c; |
||||
struct bloom_filter *filter; |
||||
setup_git_directory(); |
||||
c = lookup_commit(the_repository, commit_oid); |
||||
filter = get_bloom_filter(the_repository, c, 1); |
||||
print_bloom_filter(filter); |
||||
} |
||||
|
||||
int cmd__bloom(int argc, const char **argv) |
||||
{ |
||||
if (!strcmp(argv[1], "get_murmur3")) { |
||||
uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2])); |
||||
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed); |
||||
} |
||||
|
||||
if (!strcmp(argv[1], "generate_filter")) { |
||||
struct bloom_filter filter; |
||||
int i = 2; |
||||
filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; |
||||
filter.data = xcalloc(filter.len, sizeof(unsigned char)); |
||||
|
||||
if (!argv[2]){ |
||||
die("at least one input string expected"); |
||||
} |
||||
|
||||
while (argv[i]) { |
||||
add_string_to_filter(argv[i], &filter); |
||||
i++; |
||||
} |
||||
|
||||
print_bloom_filter(&filter); |
||||
} |
||||
|
||||
if (!strcmp(argv[1], "get_filter_for_commit")) { |
||||
struct object_id oid; |
||||
const char *end; |
||||
if (parse_oid_hex(argv[2], &oid, &end)) |
||||
die("cannot parse oid '%s'", argv[2]); |
||||
init_bloom_filters(); |
||||
get_bloom_filter_for_commit(&oid); |
||||
} |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,117 @@
@@ -0,0 +1,117 @@
|
||||
#!/bin/sh |
||||
|
||||
test_description='Testing the various Bloom filter computations in bloom.c' |
||||
. ./test-lib.sh |
||||
|
||||
test_expect_success 'compute unseeded murmur3 hash for empty string' ' |
||||
cat >expect <<-\EOF && |
||||
Murmur3 Hash with seed=0:0x00000000 |
||||
EOF |
||||
test-tool bloom get_murmur3 "" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'compute unseeded murmur3 hash for test string 1' ' |
||||
cat >expect <<-\EOF && |
||||
Murmur3 Hash with seed=0:0x627b0c2c |
||||
EOF |
||||
test-tool bloom get_murmur3 "Hello world!" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'compute unseeded murmur3 hash for test string 2' ' |
||||
cat >expect <<-\EOF && |
||||
Murmur3 Hash with seed=0:0x2e4ff723 |
||||
EOF |
||||
test-tool bloom get_murmur3 "The quick brown fox jumps over the lazy dog" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'compute bloom key for empty string' ' |
||||
cat >expect <<-\EOF && |
||||
Hashes:0x5615800c|0x5b966560|0x61174ab4|0x66983008|0x6c19155c|0x7199fab0|0x771ae004| |
||||
Filter_Length:2 |
||||
Filter_Data:11|11| |
||||
EOF |
||||
test-tool bloom generate_filter "" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'compute bloom key for whitespace' ' |
||||
cat >expect <<-\EOF && |
||||
Hashes:0xf178874c|0x5f3d6eb6|0xcd025620|0x3ac73d8a|0xa88c24f4|0x16510c5e|0x8415f3c8| |
||||
Filter_Length:2 |
||||
Filter_Data:51|55| |
||||
EOF |
||||
test-tool bloom generate_filter " " >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'compute bloom key for test string 1' ' |
||||
cat >expect <<-\EOF && |
||||
Hashes:0xb270de9b|0x1bb6f26e|0x84fd0641|0xee431a14|0x57892de7|0xc0cf41ba|0x2a15558d| |
||||
Filter_Length:2 |
||||
Filter_Data:92|6c| |
||||
EOF |
||||
test-tool bloom generate_filter "Hello world!" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'compute bloom key for test string 2' ' |
||||
cat >expect <<-\EOF && |
||||
Hashes:0x20ab385b|0xf5237fe2|0xc99bc769|0x9e140ef0|0x728c5677|0x47049dfe|0x1b7ce585| |
||||
Filter_Length:2 |
||||
Filter_Data:a5|4a| |
||||
EOF |
||||
test-tool bloom generate_filter "file.txt" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'get bloom filters for commit with no changes' ' |
||||
git init && |
||||
git commit --allow-empty -m "c0" && |
||||
cat >expect <<-\EOF && |
||||
Filter_Length:0 |
||||
Filter_Data: |
||||
EOF |
||||
test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success 'get bloom filter for commit with 10 changes' ' |
||||
rm actual && |
||||
rm expect && |
||||
mkdir smallDir && |
||||
for i in $(test_seq 0 9) |
||||
do |
||||
echo $i >smallDir/$i |
||||
done && |
||||
git add smallDir && |
||||
git commit -m "commit with 10 changes" && |
||||
cat >expect <<-\EOF && |
||||
Filter_Length:25 |
||||
Filter_Data:82|a0|65|47|0c|92|90|c0|a1|40|02|a0|e2|40|e0|04|0a|9a|66|cf|80|19|85|42|23| |
||||
EOF |
||||
test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_expect_success EXPENSIVE 'get bloom filter for commit with 513 changes' ' |
||||
rm actual && |
||||
rm expect && |
||||
mkdir bigDir && |
||||
for i in $(test_seq 0 512) |
||||
do |
||||
echo $i >bigDir/$i |
||||
done && |
||||
git add bigDir && |
||||
git commit -m "commit with 513 changes" && |
||||
cat >expect <<-\EOF && |
||||
Filter_Length:0 |
||||
Filter_Data: |
||||
EOF |
||||
test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual && |
||||
test_cmp expect actual |
||||
' |
||||
|
||||
test_done |
@ -0,0 +1,155 @@
@@ -0,0 +1,155 @@
|
||||
#!/bin/sh |
||||
|
||||
test_description='git log for a path with Bloom filters' |
||||
. ./test-lib.sh |
||||
|
||||
GIT_TEST_COMMIT_GRAPH=0 |
||||
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 |
||||
|
||||
test_expect_success 'setup test - repo, commits, commit graph, log outputs' ' |
||||
git init && |
||||
mkdir A A/B A/B/C && |
||||
test_commit c1 A/file1 && |
||||
test_commit c2 A/B/file2 && |
||||
test_commit c3 A/B/C/file3 && |
||||
test_commit c4 A/file1 && |
||||
test_commit c5 A/B/file2 && |
||||
test_commit c6 A/B/C/file3 && |
||||
test_commit c7 A/file1 && |
||||
test_commit c8 A/B/file2 && |
||||
test_commit c9 A/B/C/file3 && |
||||
test_commit c10 file_to_be_deleted && |
||||
git checkout -b side HEAD~4 && |
||||
test_commit side-1 file4 && |
||||
git checkout master && |
||||
git merge side && |
||||
test_commit c11 file5 && |
||||
mv file5 file5_renamed && |
||||
git add file5_renamed && |
||||
git commit -m "rename" && |
||||
rm file_to_be_deleted && |
||||
git add . && |
||||
git commit -m "file removed" && |
||||
git commit-graph write --reachable --changed-paths |
||||
' |
||||
graph_read_expect () { |
||||
NUM_CHUNKS=5 |
||||
cat >expect <<- EOF |
||||
header: 43475048 1 1 $NUM_CHUNKS 0 |
||||
num_commits: $1 |
||||
chunks: oid_fanout oid_lookup commit_metadata bloom_indexes bloom_data |
||||
EOF |
||||
test-tool read-graph >actual && |
||||
test_cmp expect actual |
||||
} |
||||
|
||||
test_expect_success 'commit-graph write wrote out the bloom chunks' ' |
||||
graph_read_expect 15 |
||||
' |
||||
|
||||
# Turn off any inherited trace2 settings for this test. |
||||
sane_unset GIT_TRACE2 GIT_TRACE2_PERF GIT_TRACE2_EVENT |
||||
sane_unset GIT_TRACE2_PERF_BRIEF |
||||
sane_unset GIT_TRACE2_CONFIG_PARAMS |
||||
|
||||
setup () { |
||||
rm "$TRASH_DIRECTORY/trace.perf" |
||||
git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom && |
||||
GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom |
||||
} |
||||
|
||||
test_bloom_filters_used () { |
||||
log_args=$1 |
||||
bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"zero_length_filter\":0,\"maybe\"" |
||||
setup "$log_args" && |
||||
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && |
||||
test_cmp log_wo_bloom log_w_bloom && |
||||
test_path_is_file "$TRASH_DIRECTORY/trace.perf" |
||||
} |
||||
|
||||
test_bloom_filters_not_used () { |
||||
log_args=$1 |
||||
setup "$log_args" && |
||||
!(grep -q "statistics:{\"filter_not_present\":" "$TRASH_DIRECTORY/trace.perf") && |
||||
test_cmp log_wo_bloom log_w_bloom |
||||
} |
||||
|
||||
for path in A A/B A/B/C A/file1 A/B/file2 A/B/C/file3 file4 file5 file5_renamed file_to_be_deleted |
||||
do |
||||
for option in "" \ |
||||
"--all" \ |
||||
"--full-history" \ |
||||
"--full-history --simplify-merges" \ |
||||
"--simplify-merges" \ |
||||
"--simplify-by-decoration" \ |
||||
"--follow" \ |
||||
"--first-parent" \ |
||||
"--topo-order" \ |
||||
"--date-order" \ |
||||
"--author-date-order" \ |
||||
"--ancestry-path side..master" |
||||
do |
||||
test_expect_success "git log option: $option for path: $path" ' |
||||
test_bloom_filters_used "$option -- $path" |
||||
' |
||||
done |
||||
done |
||||
|
||||
test_expect_success 'git log -- folder works with and without the trailing slash' ' |
||||
test_bloom_filters_used "-- A" && |
||||
test_bloom_filters_used "-- A/" |
||||
' |
||||
|
||||
test_expect_success 'git log for path that does not exist. ' ' |
||||
test_bloom_filters_used "-- path_does_not_exist" |
||||
' |
||||
|
||||
test_expect_success 'git log with --walk-reflogs does not use Bloom filters' ' |
||||
test_bloom_filters_not_used "--walk-reflogs -- A" |
||||
' |
||||
|
||||
test_expect_success 'git log -- multiple path specs does not use Bloom filters' ' |
||||
test_bloom_filters_not_used "-- file4 A/file1" |
||||
' |
||||
|
||||
test_expect_success 'git log with wildcard that resolves to a single path uses Bloom filters' ' |
||||
test_bloom_filters_used "-- *4" && |
||||
test_bloom_filters_used "-- *renamed" |
||||
' |
||||
|
||||
test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' ' |
||||
test_bloom_filters_not_used "-- *" && |
||||
test_bloom_filters_not_used "-- file*" |
||||
' |
||||
|
||||
test_expect_success 'setup - add commit-graph to the chain without Bloom filters' ' |
||||
test_commit c14 A/anotherFile2 && |
||||
test_commit c15 A/B/anotherFile2 && |
||||
test_commit c16 A/B/C/anotherFile2 && |
||||
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split && |
||||
test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain |
||||
' |
||||
|
||||
test_expect_success 'Do not use Bloom filters if the latest graph does not have Bloom filters.' ' |
||||
test_bloom_filters_not_used "-- A/B" |
||||
' |
||||
|
||||
test_expect_success 'setup - add commit-graph to the chain with Bloom filters' ' |
||||
test_commit c17 A/anotherFile3 && |
||||
git commit-graph write --reachable --changed-paths --split && |
||||
test_line_count = 3 .git/objects/info/commit-graphs/commit-graph-chain |
||||
' |
||||
|
||||
test_bloom_filters_used_when_some_filters_are_missing () { |
||||
log_args=$1 |
||||
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6" |
||||
setup "$log_args" && |
||||
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && |
||||
test_cmp log_wo_bloom log_w_bloom |
||||
} |
||||
|
||||
test_expect_success 'Use Bloom filters if they exist in the latest but not all commit graphs in the chain.' ' |
||||
test_bloom_filters_used_when_some_filters_are_missing "-- A/B" |
||||
' |
||||
|
||||
test_done |
Loading…
Reference in new issue