Merge branch 'rs/pop-recent-commit-with-prio-queue'

The pop_most_recent_commit() function can have quite expensive
worst case performance characteristics, which has been optimized by
using prio-queue data structure.

* rs/pop-recent-commit-with-prio-queue:
  commit: use prio_queue_replace() in pop_most_recent_commit()
  prio-queue: add prio_queue_replace()
  commit: convert pop_most_recent_commit() to prio_queue
maint
Junio C Hamano 2025-07-28 12:02:34 -07:00
commit 0f6e5037d4
10 changed files with 170 additions and 34 deletions

View File

@ -31,6 +31,7 @@
#include "parse.h" #include "parse.h"
#include "object-file.h" #include "object-file.h"
#include "object-file-convert.h" #include "object-file-convert.h"
#include "prio-queue.h"


static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);


@ -739,20 +740,27 @@ void commit_list_sort_by_date(struct commit_list **list)
commit_list_sort(list, commit_list_compare_by_date); commit_list_sort(list, commit_list_compare_by_date);
} }


struct commit *pop_most_recent_commit(struct commit_list **list, struct commit *pop_most_recent_commit(struct prio_queue *queue,
unsigned int mark) unsigned int mark)
{ {
struct commit *ret = pop_commit(list); struct commit *ret = prio_queue_peek(queue);
int get_pending = 1;
struct commit_list *parents = ret->parents; struct commit_list *parents = ret->parents;


while (parents) { while (parents) {
struct commit *commit = parents->item; struct commit *commit = parents->item;
if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) { if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) {
commit->object.flags |= mark; commit->object.flags |= mark;
commit_list_insert_by_date(commit, list); if (get_pending)
prio_queue_replace(queue, commit);
else
prio_queue_put(queue, commit);
get_pending = 0;
} }
parents = parents->next; parents = parents->next;
} }
if (get_pending)
prio_queue_get(queue);
return ret; return ret;
} }



View File

@ -201,10 +201,10 @@ const char *repo_logmsg_reencode(struct repository *r,


const char *skip_blank_lines(const char *msg); const char *skip_blank_lines(const char *msg);


/** Removes the first commit from a list sorted by date, and adds all struct prio_queue;
* of its parents.
**/ /* Removes the first commit from a prio_queue and adds its parents. */
struct commit *pop_most_recent_commit(struct commit_list **list, struct commit *pop_most_recent_commit(struct prio_queue *queue,
unsigned int mark); unsigned int mark);


struct commit *pop_commit(struct commit_list **stack); struct commit *pop_commit(struct commit_list **stack);

View File

@ -34,6 +34,7 @@
#include "commit-graph.h" #include "commit-graph.h"
#include "sigchain.h" #include "sigchain.h"
#include "mergesort.h" #include "mergesort.h"
#include "prio-queue.h"


static int transfer_unpack_limit = -1; static int transfer_unpack_limit = -1;
static int fetch_unpack_limit = -1; static int fetch_unpack_limit = -1;
@ -601,7 +602,7 @@ done:
return count ? retval : 0; return count ? retval : 0;
} }


static struct commit_list *complete; static struct prio_queue complete = { compare_commits_by_commit_date };


static int mark_complete(const struct object_id *oid) static int mark_complete(const struct object_id *oid)
{ {
@ -609,7 +610,7 @@ static int mark_complete(const struct object_id *oid)


if (commit && !(commit->object.flags & COMPLETE)) { if (commit && !(commit->object.flags & COMPLETE)) {
commit->object.flags |= COMPLETE; commit->object.flags |= COMPLETE;
commit_list_insert(commit, &complete); prio_queue_put(&complete, commit);
} }
return 0; return 0;
} }
@ -626,9 +627,12 @@ static int mark_complete_oid(const char *refname UNUSED,
static void mark_recent_complete_commits(struct fetch_pack_args *args, static void mark_recent_complete_commits(struct fetch_pack_args *args,
timestamp_t cutoff) timestamp_t cutoff)
{ {
while (complete && cutoff <= complete->item->date) { while (complete.nr) {
struct commit *item = prio_queue_peek(&complete);
if (item->date < cutoff)
break;
print_verbose(args, _("Marking %s as complete"), print_verbose(args, _("Marking %s as complete"),
oid_to_hex(&complete->item->object.oid)); oid_to_hex(&item->object.oid));
pop_most_recent_commit(&complete, COMPLETE); pop_most_recent_commit(&complete, COMPLETE);
} }
} }
@ -798,7 +802,6 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
refs_for_each_rawref(get_main_ref_store(the_repository), refs_for_each_rawref(get_main_ref_store(the_repository),
mark_complete_oid, NULL); mark_complete_oid, NULL);
for_each_cached_alternate(NULL, mark_alternate_complete); for_each_cached_alternate(NULL, mark_alternate_complete);
commit_list_sort_by_date(&complete);
if (cutoff) if (cutoff)
mark_recent_complete_commits(args, cutoff); mark_recent_complete_commits(args, cutoff);
} }

View File

@ -28,6 +28,7 @@
#include "commit-reach.h" #include "commit-reach.h"
#include "date.h" #include "date.h"
#include "object-file-convert.h" #include "object-file-convert.h"
#include "prio-queue.h"


static int get_oid_oneline(struct repository *r, const char *, struct object_id *, static int get_oid_oneline(struct repository *r, const char *, struct object_id *,
const struct commit_list *); const struct commit_list *);
@ -1461,7 +1462,7 @@ static int get_oid_oneline(struct repository *r,
const char *prefix, struct object_id *oid, const char *prefix, struct object_id *oid,
const struct commit_list *list) const struct commit_list *list)
{ {
struct commit_list *copy = NULL, **copy_tail = &copy; struct prio_queue copy = { compare_commits_by_commit_date };
const struct commit_list *l; const struct commit_list *l;
int found = 0; int found = 0;
int negative = 0; int negative = 0;
@ -1483,9 +1484,9 @@ static int get_oid_oneline(struct repository *r,


for (l = list; l; l = l->next) { for (l = list; l; l = l->next) {
l->item->object.flags |= ONELINE_SEEN; l->item->object.flags |= ONELINE_SEEN;
copy_tail = &commit_list_insert(l->item, copy_tail)->next; prio_queue_put(&copy, l->item);
} }
while (copy) { while (copy.nr) {
const char *p, *buf; const char *p, *buf;
struct commit *commit; struct commit *commit;
int matches; int matches;
@ -1507,7 +1508,7 @@ static int get_oid_oneline(struct repository *r,
regfree(&regex); regfree(&regex);
for (l = list; l; l = l->next) for (l = list; l; l = l->next)
clear_commit_marks(l->item, ONELINE_SEEN); clear_commit_marks(l->item, ONELINE_SEEN);
free_commit_list(copy); clear_prio_queue(&copy);
return found ? 0 : -1; return found ? 0 : -1;
} }


@ -2061,7 +2062,6 @@ static enum get_oid_result get_oid_with_context_1(struct repository *repo,
cb.list = &list; cb.list = &list;
refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb); refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb);
refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb); refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb);
commit_list_sort_by_date(&list);
ret = get_oid_oneline(repo, name + 2, oid, list); ret = get_oid_oneline(repo, name + 2, oid, list);


free_commit_list(list); free_commit_list(list);

View File

@ -58,22 +58,10 @@ void prio_queue_put(struct prio_queue *queue, void *thing)
} }
} }


void *prio_queue_get(struct prio_queue *queue) static void sift_down_root(struct prio_queue *queue)
{ {
void *result;
size_t ix, child; size_t ix, child;


if (!queue->nr)
return NULL;
if (!queue->compare)
return queue->array[--queue->nr].data; /* LIFO */

result = queue->array[0].data;
if (!--queue->nr)
return result;

queue->array[0] = queue->array[queue->nr];

/* Push down the one at the root */ /* Push down the one at the root */
for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) { for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) {
child = ix * 2 + 1; /* left */ child = ix * 2 + 1; /* left */
@ -86,6 +74,23 @@ void *prio_queue_get(struct prio_queue *queue)


swap(queue, child, ix); swap(queue, child, ix);
} }
}

void *prio_queue_get(struct prio_queue *queue)
{
void *result;

if (!queue->nr)
return NULL;
if (!queue->compare)
return queue->array[--queue->nr].data; /* LIFO */

result = queue->array[0].data;
if (!--queue->nr)
return result;

queue->array[0] = queue->array[queue->nr];
sift_down_root(queue);
return result; return result;
} }


@ -97,3 +102,17 @@ void *prio_queue_peek(struct prio_queue *queue)
return queue->array[queue->nr - 1].data; return queue->array[queue->nr - 1].data;
return queue->array[0].data; return queue->array[0].data;
} }

void prio_queue_replace(struct prio_queue *queue, void *thing)
{
if (!queue->nr) {
prio_queue_put(queue, thing);
} else if (!queue->compare) {
queue->array[queue->nr - 1].ctr = queue->insertion_ctr++;
queue->array[queue->nr - 1].data = thing;
} else {
queue->array[0].ctr = queue->insertion_ctr++;
queue->array[0].data = thing;
sift_down_root(queue);
}
}

View File

@ -52,6 +52,14 @@ void *prio_queue_get(struct prio_queue *);
*/ */
void *prio_queue_peek(struct prio_queue *); void *prio_queue_peek(struct prio_queue *);


/*
* Replace the "thing" that compares the smallest with a new "thing",
* like prio_queue_get()+prio_queue_put() would do, but in a more
* efficient way. Does the same as prio_queue_put() if the queue is
* empty.
*/
void prio_queue_replace(struct prio_queue *queue, void *thing);

void clear_prio_queue(struct prio_queue *); void clear_prio_queue(struct prio_queue *);


/* Reverse the LIFO elements */ /* Reverse the LIFO elements */

View File

@ -1116,6 +1116,7 @@ benchmarks = [
'perf/p1450-fsck.sh', 'perf/p1450-fsck.sh',
'perf/p1451-fsck-skip-list.sh', 'perf/p1451-fsck-skip-list.sh',
'perf/p1500-graph-walks.sh', 'perf/p1500-graph-walks.sh',
'perf/p1501-rev-parse-oneline.sh',
'perf/p2000-sparse-operations.sh', 'perf/p2000-sparse-operations.sh',
'perf/p3400-rebase.sh', 'perf/p3400-rebase.sh',
'perf/p3404-rebase-interactive.sh', 'perf/p3404-rebase-interactive.sh',

View File

@ -0,0 +1,71 @@
#!/bin/sh

test_description='Test :/ object name notation'

. ./perf-lib.sh

test_perf_fresh_repo

#
# Creates lots of merges to make history traversal costly. In
# particular it creates 2^($max_level-1)-1 2-way merges on top of
# 2^($max_level-1) root commits. E.g., the commit history looks like
# this for a $max_level of 3:
#
# _1_
# / \
# 2 3
# / \ / \
# 4 5 6 7
#
# The numbers are the fast-import marks, which also are the commit
# messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges,
# 4-7 are the root commits.
#
build_history () {
local max_level="$1" &&
local level="${2:-1}" &&
local mark="${3:-1}" &&
if test $level -eq $max_level
then
echo "reset refs/heads/master" &&
echo "from $ZERO_OID" &&
echo "commit refs/heads/master" &&
echo "mark :$mark" &&
echo "committer C <c@example.com> 1234567890 +0000" &&
echo "data <<EOF" &&
echo "$mark" &&
echo "EOF"
else
local level1=$((level+1)) &&
local mark1=$((2*mark)) &&
local mark2=$((2*mark+1)) &&
build_history $max_level $level1 $mark1 &&
build_history $max_level $level1 $mark2 &&
echo "commit refs/heads/master" &&
echo "mark :$mark" &&
echo "committer C <c@example.com> 1234567890 +0000" &&
echo "data <<EOF" &&
echo "$mark" &&
echo "EOF" &&
echo "from :$mark1" &&
echo "merge :$mark2"
fi
}

test_expect_success 'setup' '
build_history 16 | git fast-import &&
git log --format="%H %s" --reverse >commits &&
sed -n -e "s/ .*$//p" -e "q" <commits >expect &&
sed -n -e "s/^.* //p" -e "q" <commits >needle
'

test_perf "rev-parse :/$(cat needle)" '
git rev-parse :/$(cat needle) >actual
'

test_expect_success 'verify result' '
test_cmp expect actual
'

test_done

View File

@ -13,6 +13,7 @@ static int intcmp(const void *va, const void *vb, void *data UNUSED)
#define STACK -3 #define STACK -3
#define GET -4 #define GET -4
#define REVERSE -5 #define REVERSE -5
#define REPLACE -6


static int show(int *v) static int show(int *v)
{ {
@ -51,6 +52,15 @@ static void test_prio_queue(int *input, size_t input_size,
case REVERSE: case REVERSE:
prio_queue_reverse(&pq); prio_queue_reverse(&pq);
break; break;
case REPLACE:
peek = prio_queue_peek(&pq);
cl_assert(i + 1 < input_size);
cl_assert(input[i + 1] >= 0);
cl_assert(j < result_size);
cl_assert_equal_i(result[j], show(peek));
j++;
prio_queue_replace(&pq, &input[++i]);
break;
default: default:
prio_queue_put(&pq, &input[i]); prio_queue_put(&pq, &input[i]);
break; break;
@ -81,6 +91,13 @@ void test_prio_queue__empty(void)
((int []){ 1, 2, MISSING, 1, 2, MISSING })); ((int []){ 1, 2, MISSING, 1, 2, MISSING }));
} }


void test_prio_queue__replace(void)
{
TEST_INPUT(((int []){ REPLACE, 6, 2, 4, REPLACE, 5, 7, GET,
REPLACE, 1, DUMP }),
((int []){ MISSING, 2, 4, 5, 1, 6, 7 }));
}

void test_prio_queue__stack(void) void test_prio_queue__stack(void)
{ {
TEST_INPUT(((int []){ STACK, 8, 1, 5, 4, 6, 2, 3, DUMP }), TEST_INPUT(((int []){ STACK, 8, 1, 5, 4, 6, 2, 3, DUMP }),
@ -92,3 +109,9 @@ void test_prio_queue__reverse_stack(void)
TEST_INPUT(((int []){ STACK, 1, 2, 3, 4, 5, 6, REVERSE, DUMP }), TEST_INPUT(((int []){ STACK, 1, 2, 3, 4, 5, 6, REVERSE, DUMP }),
((int []){ 1, 2, 3, 4, 5, 6 })); ((int []){ 1, 2, 3, 4, 5, 6 }));
} }

void test_prio_queue__replace_stack(void)
{
TEST_INPUT(((int []){ STACK, 8, 1, 5, REPLACE, 4, 6, 2, 3, DUMP }),
((int []){ 5, 3, 2, 6, 4, 1, 8 }));
}

View File

@ -14,6 +14,7 @@
#include "blob.h" #include "blob.h"
#include "refs.h" #include "refs.h"
#include "progress.h" #include "progress.h"
#include "prio-queue.h"


static struct object_id current_commit_oid; static struct object_id current_commit_oid;


@ -78,7 +79,7 @@ static int process_tree(struct walker *walker, struct tree *tree)
#define SEEN (1U << 1) #define SEEN (1U << 1)
#define TO_SCAN (1U << 2) #define TO_SCAN (1U << 2)


static struct commit_list *complete = NULL; static struct prio_queue complete = { compare_commits_by_commit_date };


static int process_commit(struct walker *walker, struct commit *commit) static int process_commit(struct walker *walker, struct commit *commit)
{ {
@ -87,7 +88,10 @@ static int process_commit(struct walker *walker, struct commit *commit)
if (repo_parse_commit(the_repository, commit)) if (repo_parse_commit(the_repository, commit))
return -1; return -1;


while (complete && complete->item->date >= commit->date) { while (complete.nr) {
struct commit *item = prio_queue_peek(&complete);
if (item->date < commit->date)
break;
pop_most_recent_commit(&complete, COMPLETE); pop_most_recent_commit(&complete, COMPLETE);
} }


@ -233,7 +237,7 @@ static int mark_complete(const char *path UNUSED,


if (commit) { if (commit) {
commit->object.flags |= COMPLETE; commit->object.flags |= COMPLETE;
commit_list_insert(commit, &complete); prio_queue_put(&complete, commit);
} }
return 0; return 0;
} }
@ -302,7 +306,6 @@ int walker_fetch(struct walker *walker, int targets, char **target,
if (!walker->get_recover) { if (!walker->get_recover) {
refs_for_each_ref(get_main_ref_store(the_repository), refs_for_each_ref(get_main_ref_store(the_repository),
mark_complete, NULL); mark_complete, NULL);
commit_list_sort_by_date(&complete);
} }


for (i = 0; i < targets; i++) { for (i = 0; i < targets; i++) {