diff --git a/builtin/last-modified.c b/builtin/last-modified.c index ae8b36a2c3..b0ecbdc540 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -2,26 +2,32 @@ #include "bloom.h" #include "builtin.h" #include "commit-graph.h" +#include "commit-slab.h" #include "commit.h" #include "config.h" -#include "environment.h" #include "diff.h" #include "diffcore.h" #include "environment.h" +#include "ewah/ewok.h" #include "hashmap.h" #include "hex.h" -#include "log-tree.h" #include "object-name.h" #include "object.h" #include "parse-options.h" +#include "prio-queue.h" #include "quote.h" #include "repository.h" #include "revision.h" +/* Remember to update object flag allocation in object.h */ +#define PARENT1 (1u<<16) /* used instead of SEEN */ +#define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */ + struct last_modified_entry { struct hashmap_entry hashent; struct object_id oid; struct bloom_key key; + size_t diff_idx; const char path[FLEX_ARRAY]; }; @@ -37,13 +43,45 @@ static int last_modified_entry_hashcmp(const void *unused UNUSED, return strcmp(ent1->path, path ? path : ent2->path); } +/* + * Hold a bitmap for each commit we're working with. In the bitmap, each bit + * represents a path in `lm->all_paths`. An active bit indicates the path still + * needs to be associated to a commit. + */ +define_commit_slab(active_paths_for_commit, struct bitmap *); + struct last_modified { struct hashmap paths; struct rev_info rev; bool recursive; bool show_trees; + + const char **all_paths; + size_t all_paths_nr; + struct active_paths_for_commit active_paths; + + /* 'scratch' to avoid allocating a bitmap every process_parent() */ + struct bitmap *scratch; }; +static struct bitmap *active_paths_for(struct last_modified *lm, struct commit *c) +{ + struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); + if (!*bitmap) + *bitmap = bitmap_word_alloc(lm->all_paths_nr / BITS_IN_EWORD + 1); + + return *bitmap; +} + +static void active_paths_free(struct last_modified *lm, struct commit *c) +{ + struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); + if (*bitmap) { + bitmap_free(*bitmap); + *bitmap = NULL; + } +} + static void last_modified_release(struct last_modified *lm) { struct hashmap_iter iter; @@ -54,6 +92,8 @@ static void last_modified_release(struct last_modified *lm) hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); release_revisions(&lm->rev); + + free(lm->all_paths); } struct last_modified_callback_data { @@ -146,7 +186,7 @@ static void mark_path(const char *path, const struct object_id *oid, * Is it arriving at a version of interest, or is it from a side branch * which did not contribute to the final state? */ - if (!oideq(oid, &ent->oid)) + if (oid && !oideq(oid, &ent->oid)) return; last_modified_emit(data->lm, path, data->commit); @@ -196,7 +236,17 @@ static void last_modified_diff(struct diff_queue_struct *q, } } -static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) +static void pass_to_parent(struct bitmap *c, + struct bitmap *p, + size_t pos) +{ + bitmap_unset(c, pos); + bitmap_set(p, pos); +} + +static bool maybe_changed_path(struct last_modified *lm, + struct commit *origin, + struct bitmap *active) { struct bloom_filter *filter; struct last_modified_entry *ent; @@ -213,6 +263,9 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) return true; hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (active && !bitmap_get(active, ent->diff_idx)) + continue; + if (bloom_filter_contains(filter, &ent->key, lm->rev.bloom_filter_settings)) return true; @@ -220,42 +273,202 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) return false; } +static void process_parent(struct last_modified *lm, + struct prio_queue *queue, + struct commit *c, struct bitmap *active_c, + struct commit *parent, int parent_i) +{ + struct bitmap *active_p; + + repo_parse_commit(lm->rev.repo, parent); + active_p = active_paths_for(lm, parent); + + /* + * The first time entering this function for this commit (i.e. first parent) + * see if Bloom filters will tell us it's worth to do the diff. + */ + if (parent_i || maybe_changed_path(lm, c, active_c)) { + diff_tree_oid(&parent->object.oid, + &c->object.oid, "", &lm->rev.diffopt); + diffcore_std(&lm->rev.diffopt); + } + + /* + * Test each path for TREESAME-ness against the parent. If a path is + * TREESAME, pass it on to this parent. + * + * First, collect all paths that are *not* TREESAME in 'scratch'. + * Then, pass paths that *are* TREESAME and active to the parent. + */ + for (int i = 0; i < diff_queued_diff.nr; i++) { + struct diff_filepair *fp = diff_queued_diff.queue[i]; + const char *path = fp->two->path; + struct last_modified_entry *ent = + hashmap_get_entry_from_hash(&lm->paths, strhash(path), path, + struct last_modified_entry, hashent); + if (ent) { + size_t k = ent->diff_idx; + if (bitmap_get(active_c, k)) + bitmap_set(lm->scratch, k); + } + } + for (size_t i = 0; i < lm->all_paths_nr; i++) { + if (bitmap_get(active_c, i) && !bitmap_get(lm->scratch, i)) + pass_to_parent(active_c, active_p, i); + } + + /* + * If parent has any active paths, put it on the queue (if not already). + */ + if (!bitmap_is_empty(active_p) && !(parent->object.flags & PARENT1)) { + parent->object.flags |= PARENT1; + prio_queue_put(queue, parent); + } + if (!(parent->object.flags & PARENT1)) + active_paths_free(lm, parent); + + memset(lm->scratch->words, 0x0, lm->scratch->word_alloc); + diff_queue_clear(&diff_queued_diff); +} + static int last_modified_run(struct last_modified *lm) { + int max_count, queue_popped = 0; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; + struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date }; + struct commit_list *list; struct last_modified_callback_data data = { .lm = lm }; lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; lm->rev.diffopt.format_callback = last_modified_diff; lm->rev.diffopt.format_callback_data = &data; + lm->rev.no_walk = 1; prepare_revision_walk(&lm->rev); - while (hashmap_get_size(&lm->paths)) { - data.commit = get_revision(&lm->rev); - if (!data.commit) - BUG("paths remaining beyond boundary in last-modified"); + max_count = lm->rev.max_count; - if (data.commit->object.flags & BOUNDARY) { + init_active_paths_for_commit(&lm->active_paths); + lm->scratch = bitmap_word_alloc(lm->all_paths_nr); + + /* + * lm->rev.commits holds the set of boundary commits for our walk. + * + * Loop through each such commit, and place it in the appropriate queue. + */ + for (list = lm->rev.commits; list; list = list->next) { + struct commit *c = list->item; + + if (c->object.flags & BOTTOM) { + prio_queue_put(¬_queue, c); + c->object.flags |= PARENT2; + } else if (!(c->object.flags & PARENT1)) { + /* + * If the commit is a starting point (and hasn't been + * seen yet), then initialize the set of interesting + * paths, too. + */ + struct bitmap *active; + + prio_queue_put(&queue, c); + c->object.flags |= PARENT1; + + active = active_paths_for(lm, c); + for (size_t i = 0; i < lm->all_paths_nr; i++) + bitmap_set(active, i); + } + } + + while (queue.nr) { + int parent_i; + struct commit_list *p; + struct commit *c = prio_queue_get(&queue); + struct bitmap *active_c = active_paths_for(lm, c); + + if ((0 <= max_count && max_count < ++queue_popped) || + (c->object.flags & PARENT2)) { + /* + * Either a boundary commit, or we have already seen too + * many others. Either way, stop here. + */ + c->object.flags |= PARENT2 | BOUNDARY; + data.commit = c; diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, - &data.commit->object.oid, "", - &lm->rev.diffopt); + &c->object.oid, + "", &lm->rev.diffopt); diff_flush(&lm->rev.diffopt); - - break; + goto cleanup; } - if (!maybe_changed_path(lm, data.commit)) - continue; + /* + * Otherwise, make sure that 'c' isn't reachable from anything + * in the '--not' queue. + */ + repo_parse_commit(lm->rev.repo, c); - log_tree_commit(&lm->rev, data.commit); + while (not_queue.nr) { + struct commit_list *np; + struct commit *n = prio_queue_get(¬_queue); + + repo_parse_commit(lm->rev.repo, n); + + for (np = n->parents; np; np = np->next) { + if (!(np->item->object.flags & PARENT2)) { + prio_queue_put(¬_queue, np->item); + np->item->object.flags |= PARENT2; + } + } + + if (commit_graph_generation(n) < commit_graph_generation(c)) + break; + } + + /* + * Look at each parent and pass on each path that's TREESAME + * with that parent. Stop early when no active paths remain. + */ + for (p = c->parents, parent_i = 0; p; p = p->next, parent_i++) { + process_parent(lm, &queue, + c, active_c, + p->item, parent_i); + + if (bitmap_is_empty(active_c)) + break; + } + + /* + * Paths that remain active, or not TREESAME with any parent, + * were changed by 'c'. + */ + if (!bitmap_is_empty(active_c)) { + data.commit = c; + for (size_t i = 0; i < lm->all_paths_nr; i++) { + if (bitmap_get(active_c, i)) + mark_path(lm->all_paths[i], NULL, &data); + } + } + +cleanup: + active_paths_free(lm, c); } + if (hashmap_get_size(&lm->paths)) + BUG("paths remaining beyond boundary in last-modified"); + + clear_prio_queue(¬_queue); + clear_prio_queue(&queue); + clear_active_paths_for_commit(&lm->active_paths); + bitmap_free(lm->scratch); + return 0; } static int last_modified_init(struct last_modified *lm, struct repository *r, const char *prefix, int argc, const char **argv) { + struct hashmap_iter iter; + struct last_modified_entry *ent; + hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); repo_init_revisions(r, &lm->rev, prefix); @@ -280,6 +493,13 @@ static int last_modified_init(struct last_modified *lm, struct repository *r, if (populate_paths_from_revs(lm) < 0) return error(_("unable to setup last-modified")); + CALLOC_ARRAY(lm->all_paths, hashmap_get_size(&lm->paths)); + lm->all_paths_nr = 0; + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + ent->diff_idx = lm->all_paths_nr++; + lm->all_paths[ent->diff_idx] = ent->path; + } + return 0; } diff --git a/object.h b/object.h index 8c3c1c46e1..fa504a09c0 100644 --- a/object.h +++ b/object.h @@ -75,6 +75,7 @@ void object_array_init(struct object_array *array); * http-push.c: 11-----14 * commit-graph.c: 15 * commit-reach.c: 16-----19 + * builtin/last-modified.c: 1617 * sha1-name.c: 20 * list-objects-filter.c: 21 * bloom.c: 2122 diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh index 61f00bc15c..a4c1114ee2 100755 --- a/t/t8020-last-modified.sh +++ b/t/t8020-last-modified.sh @@ -57,9 +57,9 @@ test_expect_success 'last-modified recursive' ' test_expect_success 'last-modified recursive with show-trees' ' check_last_modified -r -t <<-\EOF - 3 a 3 a/b 3 a/b/file + 3 a 2 a/file 1 file EOF