revision: use priority queue in limit_list()
limit_list() maintains a date-sorted work queue of commits using a
linked list with commit_list_insert_by_date() for insertion. Each
insertion walks the list to find the right position — O(n) per insert.
In repositories with merge-heavy histories, the symmetric difference
can contain thousands of commits, making this O(n) insertion the
dominant cost.
Replace the sorted linked list with a prio_queue (binary heap). This
gives O(log n) insertion and O(log n) extraction instead of O(n)
insertion and O(1) extraction, which is a net win when the queue is
large.
The still_interesting() and everybody_uninteresting() helpers are
updated to scan the prio_queue's contiguous array instead of walking a
linked list. process_parents() already accepts both a commit_list and
a prio_queue parameter, so the change in limit_list() simply switches
which one is passed.
Benchmark: git rev-list --left-right --count HEAD~N...HEAD
Repository: 2.3M commits, merge-heavy DAG (monorepo)
Best of 5 runs, times in seconds:
commits in
symmetric diff baseline patched speedup
-------------- -------- ------- -------
10 0.01 0.01 1.0x
50 0.01 0.01 1.0x
3751 21.23 8.49 2.5x
4524 21.70 8.29 2.6x
10130 20.10 6.65 3.0x
No change for small traversals; 2.5-3.0x faster when the queue grows
to thousands of commits.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
parent
94f057755b
commit
ef8d51a8a3
38
revision.c
38
revision.c
|
|
@ -473,10 +473,10 @@ static struct commit *handle_commit(struct rev_info *revs,
|
|||
die("%s is unknown object", name);
|
||||
}
|
||||
|
||||
static int everybody_uninteresting(struct commit_list *orig,
|
||||
static int everybody_uninteresting(struct prio_queue *orig,
|
||||
struct commit **interesting_cache)
|
||||
{
|
||||
struct commit_list *list = orig;
|
||||
size_t i;
|
||||
|
||||
if (*interesting_cache) {
|
||||
struct commit *commit = *interesting_cache;
|
||||
|
|
@ -484,9 +484,8 @@ static int everybody_uninteresting(struct commit_list *orig,
|
|||
return 0;
|
||||
}
|
||||
|
||||
while (list) {
|
||||
struct commit *commit = list->item;
|
||||
list = list->next;
|
||||
for (i = 0; i < orig->nr; i++) {
|
||||
struct commit *commit = orig->array[i].data;
|
||||
if (commit->object.flags & UNINTERESTING)
|
||||
continue;
|
||||
|
||||
|
|
@ -1300,20 +1299,17 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
|
|||
/* How many extra uninteresting commits we want to see.. */
|
||||
#define SLOP 5
|
||||
|
||||
static int still_interesting(struct commit_list *src, timestamp_t date, int slop,
|
||||
static int still_interesting(struct prio_queue *src, timestamp_t date, int slop,
|
||||
struct commit **interesting_cache)
|
||||
{
|
||||
/*
|
||||
* No source list at all? We're definitely done..
|
||||
* Since src is sorted by date, it is enough to peek at the
|
||||
* first entry to compare dates. No entry at all means done.
|
||||
*/
|
||||
if (!src)
|
||||
struct commit *commit = prio_queue_peek(src);
|
||||
if (!commit)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Does the destination list contain entries with a date
|
||||
* before the source list? Definitely _not_ done.
|
||||
*/
|
||||
if (date <= src->item->date)
|
||||
if (date <= commit->date)
|
||||
return SLOP;
|
||||
|
||||
/*
|
||||
|
|
@ -1451,6 +1447,7 @@ static int limit_list(struct rev_info *revs)
|
|||
struct commit_list *newlist = NULL;
|
||||
struct commit_list **p = &newlist;
|
||||
struct commit *interesting_cache = NULL;
|
||||
struct prio_queue queue = { .compare = compare_commits_by_commit_date };
|
||||
|
||||
if (revs->ancestry_path_implicit_bottoms) {
|
||||
collect_bottom_commits(original_list,
|
||||
|
|
@ -1461,6 +1458,11 @@ static int limit_list(struct rev_info *revs)
|
|||
|
||||
while (original_list) {
|
||||
struct commit *commit = pop_commit(&original_list);
|
||||
prio_queue_put(&queue, commit);
|
||||
}
|
||||
|
||||
while (queue.nr) {
|
||||
struct commit *commit = prio_queue_get(&queue);
|
||||
struct object *obj = &commit->object;
|
||||
|
||||
if (commit == interesting_cache)
|
||||
|
|
@ -1468,11 +1470,13 @@ static int limit_list(struct rev_info *revs)
|
|||
|
||||
if (revs->max_age != -1 && (commit->date < revs->max_age))
|
||||
obj->flags |= UNINTERESTING;
|
||||
if (process_parents(revs, commit, &original_list, NULL) < 0)
|
||||
if (process_parents(revs, commit, NULL, &queue) < 0) {
|
||||
clear_prio_queue(&queue);
|
||||
return -1;
|
||||
}
|
||||
if (obj->flags & UNINTERESTING) {
|
||||
mark_parents_uninteresting(revs, commit);
|
||||
slop = still_interesting(original_list, date, slop, &interesting_cache);
|
||||
slop = still_interesting(&queue, date, slop, &interesting_cache);
|
||||
if (slop)
|
||||
continue;
|
||||
break;
|
||||
|
|
@ -1509,7 +1513,7 @@ static int limit_list(struct rev_info *revs)
|
|||
}
|
||||
}
|
||||
|
||||
commit_list_free(original_list);
|
||||
clear_prio_queue(&queue);
|
||||
revs->commits = newlist;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue