commit-reach: avoid commit_list_insert_by_date()

Building a list using commit_list_insert_by_date() has quadratic worst
case complexity.  Avoid it by just appending in the loop and sorting at
the end.

The number of merge bases is usually small, so don't expect speedups in
normal repositories.  It has no limit, though.  The added perf test
shows a nice improvement when dealing with 16384 merge bases:

Test                     v2.51.1           HEAD
-----------------------------------------------------------------
6010.2: git merge-base   0.55(0.54+0.00)   0.03(0.02+0.00) -94.5%

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
René Scharfe 2025-10-24 18:47:10 +02:00 committed by Junio C Hamano
parent 81f86aacc4
commit 134ec330d2
2 changed files with 110 additions and 5 deletions

View File

@ -60,6 +60,7 @@ static int paint_down_to_common(struct repository *r,
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
int i;
timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
struct commit_list **tail = result;

if (!min_generation && !corrected_commit_dates_enabled(r))
queue.compare = compare_commits_by_commit_date;
@ -95,7 +96,7 @@ static int paint_down_to_common(struct repository *r,
if (flags == (PARENT1 | PARENT2)) {
if (!(commit->object.flags & RESULT)) {
commit->object.flags |= RESULT;
commit_list_insert_by_date(commit, result);
tail = commit_list_append(commit, tail);
}
/* Mark parents of a found merge stale */
flags |= STALE;
@ -128,6 +129,7 @@ static int paint_down_to_common(struct repository *r,
}

clear_prio_queue(&queue);
commit_list_sort_by_date(result);
return 0;
}

@ -136,7 +138,7 @@ static int merge_bases_many(struct repository *r,
struct commit **twos,
struct commit_list **result)
{
struct commit_list *list = NULL;
struct commit_list *list = NULL, **tail = result;
int i;

for (i = 0; i < n; i++) {
@ -171,8 +173,9 @@ static int merge_bases_many(struct repository *r,
while (list) {
struct commit *commit = pop_commit(&list);
if (!(commit->object.flags & STALE))
commit_list_insert_by_date(commit, result);
tail = commit_list_append(commit, tail);
}
commit_list_sort_by_date(result);
return 0;
}

@ -425,7 +428,7 @@ static int get_merge_bases_many_0(struct repository *r,
int cleanup,
struct commit_list **result)
{
struct commit_list *list;
struct commit_list *list, **tail = result;
struct commit **rslt;
size_t cnt, i;
int ret;
@ -461,7 +464,8 @@ static int get_merge_bases_many_0(struct repository *r,
return -1;
}
for (i = 0; i < cnt; i++)
commit_list_insert_by_date(rslt[i], result);
tail = commit_list_append(rslt[i], tail);
commit_list_sort_by_date(result);
free(rslt);
return 0;
}

101
t/perf/p6010-merge-base.sh Executable file
View File

@ -0,0 +1,101 @@
#!/bin/sh

test_description='Test git merge-base'

. ./perf-lib.sh

test_perf_fresh_repo

#
# Creates lots of merges to make history traversal costly. In
# particular it creates 2^($max_level-1)-1 2-way merges on top of
# 2^($max_level-1) root commits. E.g., the commit history looks like
# this for a $max_level of 3:
#
# _1_
# / \
# 2 3
# / \ / \
# 4 5 6 7
#
# The numbers are the fast-import marks, which also are the commit
# messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges,
# 4-7 are the root commits.
#
build_history () {
local max_level="$1" &&
local level="${2:-1}" &&
local mark="${3:-1}" &&
if test $level -eq $max_level
then
echo "reset refs/heads/master" &&
echo "from $ZERO_OID" &&
echo "commit refs/heads/master" &&
echo "mark :$mark" &&
echo "committer C <c@example.com> 1234567890 +0000" &&
echo "data <<EOF" &&
echo "$mark" &&
echo "EOF"
else
local level1=$((level+1)) &&
local mark1=$((2*mark)) &&
local mark2=$((2*mark+1)) &&
build_history $max_level $level1 $mark1 &&
build_history $max_level $level1 $mark2 &&
echo "commit refs/heads/master" &&
echo "mark :$mark" &&
echo "committer C <c@example.com> 1234567890 +0000" &&
echo "data <<EOF" &&
echo "$mark" &&
echo "EOF" &&
echo "from :$mark1" &&
echo "merge :$mark2"
fi
}

#
# Creates a new merge history in the same shape as build_history does,
# while reusing the same root commits. This way the two top commits
# have 2^($max_level-1) merge bases between them.
#
build_history2 () {
local max_level="$1" &&
local level="${2:-1}" &&
local mark="${3:-1}" &&
if test $level -lt $max_level
then
local level1=$((level+1)) &&
local mark1=$((2*mark)) &&
local mark2=$((2*mark+1)) &&
build_history2 $max_level $level1 $mark1 &&
build_history2 $max_level $level1 $mark2 &&
echo "commit refs/heads/master" &&
echo "mark :$mark" &&
echo "committer C <c@example.com> 1234567890 +0000" &&
echo "data <<EOF" &&
echo "$mark II" &&
echo "EOF" &&
echo "from :$mark1" &&
echo "merge :$mark2"
fi
}

test_expect_success 'setup' '
max_level=15 &&
build_history $max_level | git fast-import --export-marks=marks &&
git tag one &&
build_history2 $max_level | git fast-import --import-marks=marks --force &&
git tag two &&
git gc &&
git log --format=%H --no-merges >expect
'

test_perf 'git merge-base' '
git merge-base --all one two >actual
'

test_expect_success 'verify result' '
test_cmp expect actual
'

test_done