last-modified: use Bloom filters when available
Our 'git last-modified' performs a revision walk, and computes a diff at each point in the walk to figure out whether a given revision changed any of the paths it considers interesting. When changed-path Bloom filters are available, we can avoid computing many such diffs. Before computing a diff, we first check if any of the remaining paths of interest were possibly changed at a given commit by consulting its Bloom filter. If any of them are, we are resigned to compute the diff. If none of those queries returned "maybe", we know that the given commit doesn't contain any changed paths which are interesting to us. So, we can avoid computing it in this case. Comparing the perf test results on git.git: Test HEAD~ HEAD ------------------------------------------------------------------------------------ 8020.1: top-level last-modified 4.49(4.34+0.11) 2.22(2.05+0.09) -50.6% 8020.2: top-level recursive last-modified 5.64(5.45+0.11) 5.62(5.30+0.11) -0.4% 8020.3: subdir last-modified 0.11(0.06+0.04) 0.07(0.03+0.04) -36.4% Based-on-patch-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Toon Claes <toon@iotcl.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>main
parent
97d5301c54
commit
8d9a7cdfda
|
@ -1,5 +1,7 @@
|
||||||
#include "git-compat-util.h"
|
#include "git-compat-util.h"
|
||||||
|
#include "bloom.h"
|
||||||
#include "builtin.h"
|
#include "builtin.h"
|
||||||
|
#include "commit-graph.h"
|
||||||
#include "commit.h"
|
#include "commit.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "diff.h"
|
#include "diff.h"
|
||||||
|
@ -18,6 +20,7 @@
|
||||||
struct last_modified_entry {
|
struct last_modified_entry {
|
||||||
struct hashmap_entry hashent;
|
struct hashmap_entry hashent;
|
||||||
struct object_id oid;
|
struct object_id oid;
|
||||||
|
struct bloom_key key;
|
||||||
const char path[FLEX_ARRAY];
|
const char path[FLEX_ARRAY];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -42,6 +45,12 @@ struct last_modified {
|
||||||
|
|
||||||
static void last_modified_release(struct last_modified *lm)
|
static void last_modified_release(struct last_modified *lm)
|
||||||
{
|
{
|
||||||
|
struct hashmap_iter iter;
|
||||||
|
struct last_modified_entry *ent;
|
||||||
|
|
||||||
|
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent)
|
||||||
|
bloom_key_clear(&ent->key);
|
||||||
|
|
||||||
hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
|
hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
|
||||||
release_revisions(&lm->rev);
|
release_revisions(&lm->rev);
|
||||||
}
|
}
|
||||||
|
@ -63,6 +72,9 @@ static void add_path_from_diff(struct diff_queue_struct *q,
|
||||||
|
|
||||||
FLEX_ALLOC_STR(ent, path, path);
|
FLEX_ALLOC_STR(ent, path, path);
|
||||||
oidcpy(&ent->oid, &p->two->oid);
|
oidcpy(&ent->oid, &p->two->oid);
|
||||||
|
if (lm->rev.bloom_filter_settings)
|
||||||
|
bloom_key_fill(&ent->key, path, strlen(path),
|
||||||
|
lm->rev.bloom_filter_settings);
|
||||||
hashmap_entry_init(&ent->hashent, strhash(ent->path));
|
hashmap_entry_init(&ent->hashent, strhash(ent->path));
|
||||||
hashmap_add(&lm->paths, &ent->hashent);
|
hashmap_add(&lm->paths, &ent->hashent);
|
||||||
}
|
}
|
||||||
|
@ -139,6 +151,7 @@ static void mark_path(const char *path, const struct object_id *oid,
|
||||||
last_modified_emit(data->lm, path, data->commit);
|
last_modified_emit(data->lm, path, data->commit);
|
||||||
|
|
||||||
hashmap_remove(&data->lm->paths, &ent->hashent, path);
|
hashmap_remove(&data->lm->paths, &ent->hashent, path);
|
||||||
|
bloom_key_clear(&ent->key);
|
||||||
free(ent);
|
free(ent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,6 +195,30 @@ static void last_modified_diff(struct diff_queue_struct *q,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
|
||||||
|
{
|
||||||
|
struct bloom_filter *filter;
|
||||||
|
struct last_modified_entry *ent;
|
||||||
|
struct hashmap_iter iter;
|
||||||
|
|
||||||
|
if (!lm->rev.bloom_filter_settings)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (commit_graph_generation(origin) == GENERATION_NUMBER_INFINITY)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
filter = get_bloom_filter(lm->rev.repo, origin);
|
||||||
|
if (!filter)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
|
||||||
|
if (bloom_filter_contains(filter, &ent->key,
|
||||||
|
lm->rev.bloom_filter_settings))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static int last_modified_run(struct last_modified *lm)
|
static int last_modified_run(struct last_modified *lm)
|
||||||
{
|
{
|
||||||
struct last_modified_callback_data data = { .lm = lm };
|
struct last_modified_callback_data data = { .lm = lm };
|
||||||
|
@ -202,9 +239,14 @@ static int last_modified_run(struct last_modified *lm)
|
||||||
&data.commit->object.oid, "",
|
&data.commit->object.oid, "",
|
||||||
&lm->rev.diffopt);
|
&lm->rev.diffopt);
|
||||||
diff_flush(&lm->rev.diffopt);
|
diff_flush(&lm->rev.diffopt);
|
||||||
} else {
|
|
||||||
log_tree_commit(&lm->rev, data.commit);
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!maybe_changed_path(lm, data.commit))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
log_tree_commit(&lm->rev, data.commit);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -231,6 +273,8 @@ static int last_modified_init(struct last_modified *lm, struct repository *r,
|
||||||
return argc;
|
return argc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo);
|
||||||
|
|
||||||
if (populate_paths_from_revs(lm) < 0)
|
if (populate_paths_from_revs(lm) < 0)
|
||||||
return error(_("unable to setup last-modified"));
|
return error(_("unable to setup last-modified"));
|
||||||
|
|
||||||
|
|
|
@ -820,7 +820,12 @@ int corrected_commit_dates_enabled(struct repository *r)
|
||||||
|
|
||||||
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
|
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
|
||||||
{
|
{
|
||||||
struct commit_graph *g = r->objects->commit_graph;
|
struct commit_graph *g;
|
||||||
|
|
||||||
|
if (!prepare_commit_graph(r))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
g = r->objects->commit_graph;
|
||||||
while (g) {
|
while (g) {
|
||||||
if (g->bloom_filter_settings)
|
if (g->bloom_filter_settings)
|
||||||
return g->bloom_filter_settings;
|
return g->bloom_filter_settings;
|
||||||
|
|
Loading…
Reference in New Issue