Merge branch 'tc/last-modified-active-paths-optimization'
"git last-modified" was optimized by narrowing the set of paths to follow as it dug deeper in the history. * tc/last-modified-active-paths-optimization: last-modified: implement faster algorithmmain
commit
99bd5a5c9f
|
|
@ -2,26 +2,32 @@
|
||||||
#include "bloom.h"
|
#include "bloom.h"
|
||||||
#include "builtin.h"
|
#include "builtin.h"
|
||||||
#include "commit-graph.h"
|
#include "commit-graph.h"
|
||||||
|
#include "commit-slab.h"
|
||||||
#include "commit.h"
|
#include "commit.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "environment.h"
|
|
||||||
#include "diff.h"
|
#include "diff.h"
|
||||||
#include "diffcore.h"
|
#include "diffcore.h"
|
||||||
#include "environment.h"
|
#include "environment.h"
|
||||||
|
#include "ewah/ewok.h"
|
||||||
#include "hashmap.h"
|
#include "hashmap.h"
|
||||||
#include "hex.h"
|
#include "hex.h"
|
||||||
#include "log-tree.h"
|
|
||||||
#include "object-name.h"
|
#include "object-name.h"
|
||||||
#include "object.h"
|
#include "object.h"
|
||||||
#include "parse-options.h"
|
#include "parse-options.h"
|
||||||
|
#include "prio-queue.h"
|
||||||
#include "quote.h"
|
#include "quote.h"
|
||||||
#include "repository.h"
|
#include "repository.h"
|
||||||
#include "revision.h"
|
#include "revision.h"
|
||||||
|
|
||||||
|
/* Remember to update object flag allocation in object.h */
|
||||||
|
#define PARENT1 (1u<<16) /* used instead of SEEN */
|
||||||
|
#define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */
|
||||||
|
|
||||||
struct last_modified_entry {
|
struct last_modified_entry {
|
||||||
struct hashmap_entry hashent;
|
struct hashmap_entry hashent;
|
||||||
struct object_id oid;
|
struct object_id oid;
|
||||||
struct bloom_key key;
|
struct bloom_key key;
|
||||||
|
size_t diff_idx;
|
||||||
const char path[FLEX_ARRAY];
|
const char path[FLEX_ARRAY];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -37,13 +43,45 @@ static int last_modified_entry_hashcmp(const void *unused UNUSED,
|
||||||
return strcmp(ent1->path, path ? path : ent2->path);
|
return strcmp(ent1->path, path ? path : ent2->path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hold a bitmap for each commit we're working with. In the bitmap, each bit
|
||||||
|
* represents a path in `lm->all_paths`. An active bit indicates the path still
|
||||||
|
* needs to be associated to a commit.
|
||||||
|
*/
|
||||||
|
define_commit_slab(active_paths_for_commit, struct bitmap *);
|
||||||
|
|
||||||
struct last_modified {
|
struct last_modified {
|
||||||
struct hashmap paths;
|
struct hashmap paths;
|
||||||
struct rev_info rev;
|
struct rev_info rev;
|
||||||
bool recursive;
|
bool recursive;
|
||||||
bool show_trees;
|
bool show_trees;
|
||||||
|
|
||||||
|
const char **all_paths;
|
||||||
|
size_t all_paths_nr;
|
||||||
|
struct active_paths_for_commit active_paths;
|
||||||
|
|
||||||
|
/* 'scratch' to avoid allocating a bitmap every process_parent() */
|
||||||
|
struct bitmap *scratch;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct bitmap *active_paths_for(struct last_modified *lm, struct commit *c)
|
||||||
|
{
|
||||||
|
struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c);
|
||||||
|
if (!*bitmap)
|
||||||
|
*bitmap = bitmap_word_alloc(lm->all_paths_nr / BITS_IN_EWORD + 1);
|
||||||
|
|
||||||
|
return *bitmap;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void active_paths_free(struct last_modified *lm, struct commit *c)
|
||||||
|
{
|
||||||
|
struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c);
|
||||||
|
if (*bitmap) {
|
||||||
|
bitmap_free(*bitmap);
|
||||||
|
*bitmap = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void last_modified_release(struct last_modified *lm)
|
static void last_modified_release(struct last_modified *lm)
|
||||||
{
|
{
|
||||||
struct hashmap_iter iter;
|
struct hashmap_iter iter;
|
||||||
|
|
@ -54,6 +92,8 @@ static void last_modified_release(struct last_modified *lm)
|
||||||
|
|
||||||
hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
|
hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
|
||||||
release_revisions(&lm->rev);
|
release_revisions(&lm->rev);
|
||||||
|
|
||||||
|
free(lm->all_paths);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct last_modified_callback_data {
|
struct last_modified_callback_data {
|
||||||
|
|
@ -146,7 +186,7 @@ static void mark_path(const char *path, const struct object_id *oid,
|
||||||
* Is it arriving at a version of interest, or is it from a side branch
|
* Is it arriving at a version of interest, or is it from a side branch
|
||||||
* which did not contribute to the final state?
|
* which did not contribute to the final state?
|
||||||
*/
|
*/
|
||||||
if (!oideq(oid, &ent->oid))
|
if (oid && !oideq(oid, &ent->oid))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
last_modified_emit(data->lm, path, data->commit);
|
last_modified_emit(data->lm, path, data->commit);
|
||||||
|
|
@ -196,7 +236,17 @@ static void last_modified_diff(struct diff_queue_struct *q,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
|
static void pass_to_parent(struct bitmap *c,
|
||||||
|
struct bitmap *p,
|
||||||
|
size_t pos)
|
||||||
|
{
|
||||||
|
bitmap_unset(c, pos);
|
||||||
|
bitmap_set(p, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool maybe_changed_path(struct last_modified *lm,
|
||||||
|
struct commit *origin,
|
||||||
|
struct bitmap *active)
|
||||||
{
|
{
|
||||||
struct bloom_filter *filter;
|
struct bloom_filter *filter;
|
||||||
struct last_modified_entry *ent;
|
struct last_modified_entry *ent;
|
||||||
|
|
@ -213,6 +263,9 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
|
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
|
||||||
|
if (active && !bitmap_get(active, ent->diff_idx))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (bloom_filter_contains(filter, &ent->key,
|
if (bloom_filter_contains(filter, &ent->key,
|
||||||
lm->rev.bloom_filter_settings))
|
lm->rev.bloom_filter_settings))
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -220,42 +273,202 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void process_parent(struct last_modified *lm,
|
||||||
|
struct prio_queue *queue,
|
||||||
|
struct commit *c, struct bitmap *active_c,
|
||||||
|
struct commit *parent, int parent_i)
|
||||||
|
{
|
||||||
|
struct bitmap *active_p;
|
||||||
|
|
||||||
|
repo_parse_commit(lm->rev.repo, parent);
|
||||||
|
active_p = active_paths_for(lm, parent);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The first time entering this function for this commit (i.e. first parent)
|
||||||
|
* see if Bloom filters will tell us it's worth to do the diff.
|
||||||
|
*/
|
||||||
|
if (parent_i || maybe_changed_path(lm, c, active_c)) {
|
||||||
|
diff_tree_oid(&parent->object.oid,
|
||||||
|
&c->object.oid, "", &lm->rev.diffopt);
|
||||||
|
diffcore_std(&lm->rev.diffopt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test each path for TREESAME-ness against the parent. If a path is
|
||||||
|
* TREESAME, pass it on to this parent.
|
||||||
|
*
|
||||||
|
* First, collect all paths that are *not* TREESAME in 'scratch'.
|
||||||
|
* Then, pass paths that *are* TREESAME and active to the parent.
|
||||||
|
*/
|
||||||
|
for (int i = 0; i < diff_queued_diff.nr; i++) {
|
||||||
|
struct diff_filepair *fp = diff_queued_diff.queue[i];
|
||||||
|
const char *path = fp->two->path;
|
||||||
|
struct last_modified_entry *ent =
|
||||||
|
hashmap_get_entry_from_hash(&lm->paths, strhash(path), path,
|
||||||
|
struct last_modified_entry, hashent);
|
||||||
|
if (ent) {
|
||||||
|
size_t k = ent->diff_idx;
|
||||||
|
if (bitmap_get(active_c, k))
|
||||||
|
bitmap_set(lm->scratch, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < lm->all_paths_nr; i++) {
|
||||||
|
if (bitmap_get(active_c, i) && !bitmap_get(lm->scratch, i))
|
||||||
|
pass_to_parent(active_c, active_p, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If parent has any active paths, put it on the queue (if not already).
|
||||||
|
*/
|
||||||
|
if (!bitmap_is_empty(active_p) && !(parent->object.flags & PARENT1)) {
|
||||||
|
parent->object.flags |= PARENT1;
|
||||||
|
prio_queue_put(queue, parent);
|
||||||
|
}
|
||||||
|
if (!(parent->object.flags & PARENT1))
|
||||||
|
active_paths_free(lm, parent);
|
||||||
|
|
||||||
|
memset(lm->scratch->words, 0x0, lm->scratch->word_alloc);
|
||||||
|
diff_queue_clear(&diff_queued_diff);
|
||||||
|
}
|
||||||
|
|
||||||
static int last_modified_run(struct last_modified *lm)
|
static int last_modified_run(struct last_modified *lm)
|
||||||
{
|
{
|
||||||
|
int max_count, queue_popped = 0;
|
||||||
|
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
|
||||||
|
struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date };
|
||||||
|
struct commit_list *list;
|
||||||
struct last_modified_callback_data data = { .lm = lm };
|
struct last_modified_callback_data data = { .lm = lm };
|
||||||
|
|
||||||
lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
|
lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
|
||||||
lm->rev.diffopt.format_callback = last_modified_diff;
|
lm->rev.diffopt.format_callback = last_modified_diff;
|
||||||
lm->rev.diffopt.format_callback_data = &data;
|
lm->rev.diffopt.format_callback_data = &data;
|
||||||
|
lm->rev.no_walk = 1;
|
||||||
|
|
||||||
prepare_revision_walk(&lm->rev);
|
prepare_revision_walk(&lm->rev);
|
||||||
|
|
||||||
while (hashmap_get_size(&lm->paths)) {
|
max_count = lm->rev.max_count;
|
||||||
data.commit = get_revision(&lm->rev);
|
|
||||||
if (!data.commit)
|
|
||||||
BUG("paths remaining beyond boundary in last-modified");
|
|
||||||
|
|
||||||
if (data.commit->object.flags & BOUNDARY) {
|
init_active_paths_for_commit(&lm->active_paths);
|
||||||
|
lm->scratch = bitmap_word_alloc(lm->all_paths_nr);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lm->rev.commits holds the set of boundary commits for our walk.
|
||||||
|
*
|
||||||
|
* Loop through each such commit, and place it in the appropriate queue.
|
||||||
|
*/
|
||||||
|
for (list = lm->rev.commits; list; list = list->next) {
|
||||||
|
struct commit *c = list->item;
|
||||||
|
|
||||||
|
if (c->object.flags & BOTTOM) {
|
||||||
|
prio_queue_put(¬_queue, c);
|
||||||
|
c->object.flags |= PARENT2;
|
||||||
|
} else if (!(c->object.flags & PARENT1)) {
|
||||||
|
/*
|
||||||
|
* If the commit is a starting point (and hasn't been
|
||||||
|
* seen yet), then initialize the set of interesting
|
||||||
|
* paths, too.
|
||||||
|
*/
|
||||||
|
struct bitmap *active;
|
||||||
|
|
||||||
|
prio_queue_put(&queue, c);
|
||||||
|
c->object.flags |= PARENT1;
|
||||||
|
|
||||||
|
active = active_paths_for(lm, c);
|
||||||
|
for (size_t i = 0; i < lm->all_paths_nr; i++)
|
||||||
|
bitmap_set(active, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (queue.nr) {
|
||||||
|
int parent_i;
|
||||||
|
struct commit_list *p;
|
||||||
|
struct commit *c = prio_queue_get(&queue);
|
||||||
|
struct bitmap *active_c = active_paths_for(lm, c);
|
||||||
|
|
||||||
|
if ((0 <= max_count && max_count < ++queue_popped) ||
|
||||||
|
(c->object.flags & PARENT2)) {
|
||||||
|
/*
|
||||||
|
* Either a boundary commit, or we have already seen too
|
||||||
|
* many others. Either way, stop here.
|
||||||
|
*/
|
||||||
|
c->object.flags |= PARENT2 | BOUNDARY;
|
||||||
|
data.commit = c;
|
||||||
diff_tree_oid(lm->rev.repo->hash_algo->empty_tree,
|
diff_tree_oid(lm->rev.repo->hash_algo->empty_tree,
|
||||||
&data.commit->object.oid, "",
|
&c->object.oid,
|
||||||
&lm->rev.diffopt);
|
"", &lm->rev.diffopt);
|
||||||
diff_flush(&lm->rev.diffopt);
|
diff_flush(&lm->rev.diffopt);
|
||||||
|
goto cleanup;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!maybe_changed_path(lm, data.commit))
|
/*
|
||||||
continue;
|
* Otherwise, make sure that 'c' isn't reachable from anything
|
||||||
|
* in the '--not' queue.
|
||||||
|
*/
|
||||||
|
repo_parse_commit(lm->rev.repo, c);
|
||||||
|
|
||||||
log_tree_commit(&lm->rev, data.commit);
|
while (not_queue.nr) {
|
||||||
|
struct commit_list *np;
|
||||||
|
struct commit *n = prio_queue_get(¬_queue);
|
||||||
|
|
||||||
|
repo_parse_commit(lm->rev.repo, n);
|
||||||
|
|
||||||
|
for (np = n->parents; np; np = np->next) {
|
||||||
|
if (!(np->item->object.flags & PARENT2)) {
|
||||||
|
prio_queue_put(¬_queue, np->item);
|
||||||
|
np->item->object.flags |= PARENT2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (commit_graph_generation(n) < commit_graph_generation(c))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look at each parent and pass on each path that's TREESAME
|
||||||
|
* with that parent. Stop early when no active paths remain.
|
||||||
|
*/
|
||||||
|
for (p = c->parents, parent_i = 0; p; p = p->next, parent_i++) {
|
||||||
|
process_parent(lm, &queue,
|
||||||
|
c, active_c,
|
||||||
|
p->item, parent_i);
|
||||||
|
|
||||||
|
if (bitmap_is_empty(active_c))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Paths that remain active, or not TREESAME with any parent,
|
||||||
|
* were changed by 'c'.
|
||||||
|
*/
|
||||||
|
if (!bitmap_is_empty(active_c)) {
|
||||||
|
data.commit = c;
|
||||||
|
for (size_t i = 0; i < lm->all_paths_nr; i++) {
|
||||||
|
if (bitmap_get(active_c, i))
|
||||||
|
mark_path(lm->all_paths[i], NULL, &data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
active_paths_free(lm, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hashmap_get_size(&lm->paths))
|
||||||
|
BUG("paths remaining beyond boundary in last-modified");
|
||||||
|
|
||||||
|
clear_prio_queue(¬_queue);
|
||||||
|
clear_prio_queue(&queue);
|
||||||
|
clear_active_paths_for_commit(&lm->active_paths);
|
||||||
|
bitmap_free(lm->scratch);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int last_modified_init(struct last_modified *lm, struct repository *r,
|
static int last_modified_init(struct last_modified *lm, struct repository *r,
|
||||||
const char *prefix, int argc, const char **argv)
|
const char *prefix, int argc, const char **argv)
|
||||||
{
|
{
|
||||||
|
struct hashmap_iter iter;
|
||||||
|
struct last_modified_entry *ent;
|
||||||
|
|
||||||
hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0);
|
hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0);
|
||||||
|
|
||||||
repo_init_revisions(r, &lm->rev, prefix);
|
repo_init_revisions(r, &lm->rev, prefix);
|
||||||
|
|
@ -280,6 +493,13 @@ static int last_modified_init(struct last_modified *lm, struct repository *r,
|
||||||
if (populate_paths_from_revs(lm) < 0)
|
if (populate_paths_from_revs(lm) < 0)
|
||||||
return error(_("unable to setup last-modified"));
|
return error(_("unable to setup last-modified"));
|
||||||
|
|
||||||
|
CALLOC_ARRAY(lm->all_paths, hashmap_get_size(&lm->paths));
|
||||||
|
lm->all_paths_nr = 0;
|
||||||
|
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
|
||||||
|
ent->diff_idx = lm->all_paths_nr++;
|
||||||
|
lm->all_paths[ent->diff_idx] = ent->path;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
1
object.h
1
object.h
|
|
@ -75,6 +75,7 @@ void object_array_init(struct object_array *array);
|
||||||
* http-push.c: 11-----14
|
* http-push.c: 11-----14
|
||||||
* commit-graph.c: 15
|
* commit-graph.c: 15
|
||||||
* commit-reach.c: 16-----19
|
* commit-reach.c: 16-----19
|
||||||
|
* builtin/last-modified.c: 1617
|
||||||
* sha1-name.c: 20
|
* sha1-name.c: 20
|
||||||
* list-objects-filter.c: 21
|
* list-objects-filter.c: 21
|
||||||
* bloom.c: 2122
|
* bloom.c: 2122
|
||||||
|
|
|
||||||
|
|
@ -57,9 +57,9 @@ test_expect_success 'last-modified recursive' '
|
||||||
|
|
||||||
test_expect_success 'last-modified recursive with show-trees' '
|
test_expect_success 'last-modified recursive with show-trees' '
|
||||||
check_last_modified -r -t <<-\EOF
|
check_last_modified -r -t <<-\EOF
|
||||||
3 a
|
|
||||||
3 a/b
|
3 a/b
|
||||||
3 a/b/file
|
3 a/b/file
|
||||||
|
3 a
|
||||||
2 a/file
|
2 a/file
|
||||||
1 file
|
1 file
|
||||||
EOF
|
EOF
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue