Merge branch 'jk/fewer-pack-rescan'
Internaly we use 0{40} as a placeholder object name to signal the codepath that there is no such object (e.g. the fast-forward check while "git fetch" stores a new remote-tracking ref says "we know there is no 'old' thing pointed at by the ref, as we are creating it anew" by passing 0{40} for the 'old' side), and expect that a codepath to locate an in-core object to return NULL as a sign that the object does not exist. A look-up for an object that does not exist however is quite costly with a repository with large number of packfiles. This access pattern has been optimized. * jk/fewer-pack-rescan: sha1_file: fast-path null sha1 as a missing object everything_local: use "quick" object existence check p5551: add a script to test fetch pack-dir rescans t/perf/lib-pack: use fast-import checkpoint to create packs p5550: factor out nonsense-pack creationmaint
commit
79bafd23a8
|
@ -716,7 +716,8 @@ static int everything_local(struct fetch_pack_args *args,
|
|||
for (ref = *refs; ref; ref = ref->next) {
|
||||
struct object *o;
|
||||
|
||||
if (!has_object_file(&ref->old_oid))
|
||||
if (!has_object_file_with_flags(&ref->old_oid,
|
||||
OBJECT_INFO_QUICK))
|
||||
continue;
|
||||
|
||||
o = parse_object(&ref->old_oid);
|
||||
|
|
|
@ -1164,6 +1164,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
|
|||
lookup_replace_object(sha1) :
|
||||
sha1;
|
||||
|
||||
if (is_null_sha1(real))
|
||||
return -1;
|
||||
|
||||
if (!oi)
|
||||
oi = &blank_oi;
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
# Helpers for dealing with large numbers of packs.
|
||||
|
||||
# create $1 nonsense packs, each with a single blob
|
||||
create_packs () {
|
||||
perl -le '
|
||||
my ($n) = @ARGV;
|
||||
for (1..$n) {
|
||||
print "blob";
|
||||
print "data <<EOF";
|
||||
print "$_";
|
||||
print "EOF";
|
||||
print "checkpoint"
|
||||
}
|
||||
' "$@" |
|
||||
git fast-import
|
||||
}
|
||||
|
||||
# create a large number of packs, disabling any gc which might
|
||||
# cause us to repack them
|
||||
setup_many_packs () {
|
||||
git config gc.auto 0 &&
|
||||
git config gc.autopacklimit 0 &&
|
||||
git config fastimport.unpacklimit 0 &&
|
||||
create_packs 500
|
||||
}
|
|
@ -20,6 +20,7 @@ start to show a noticeable performance problem on my machine, but without
|
|||
taking too long to set up and run the tests.
|
||||
'
|
||||
. ./perf-lib.sh
|
||||
. "$TEST_DIRECTORY/perf/lib-pack.sh"
|
||||
|
||||
# make a long nonsense history on branch $1, consisting of $2 commits, each
|
||||
# with a unique file pointing to the blob at $2.
|
||||
|
@ -44,26 +45,6 @@ create_tags () {
|
|||
git update-ref --stdin
|
||||
}
|
||||
|
||||
# create $1 nonsense packs, each with a single blob
|
||||
create_packs () {
|
||||
perl -le '
|
||||
my ($n) = @ARGV;
|
||||
for (1..$n) {
|
||||
print "blob";
|
||||
print "data <<EOF";
|
||||
print "$_";
|
||||
print "EOF";
|
||||
}
|
||||
' "$@" |
|
||||
git fast-import &&
|
||||
|
||||
git cat-file --batch-all-objects --batch-check='%(objectname)' |
|
||||
while read sha1
|
||||
do
|
||||
echo $sha1 | git pack-objects .git/objects/pack/pack
|
||||
done
|
||||
}
|
||||
|
||||
test_expect_success 'create parent and child' '
|
||||
git init parent &&
|
||||
git -C parent commit --allow-empty -m base &&
|
||||
|
@ -84,9 +65,7 @@ test_expect_success 'populate parent tags' '
|
|||
test_expect_success 'create child packs' '
|
||||
(
|
||||
cd child &&
|
||||
git config gc.auto 0 &&
|
||||
git config gc.autopacklimit 0 &&
|
||||
create_packs 500
|
||||
setup_many_packs
|
||||
)
|
||||
'
|
||||
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#!/bin/sh
|
||||
|
||||
test_description='fetch performance with many packs
|
||||
|
||||
It is common for fetch to consider objects that we might not have, and it is an
|
||||
easy mistake for the code to use a function like `parse_object` that might
|
||||
give the correct _answer_ on such an object, but do so slowly (due to
|
||||
re-scanning the pack directory for lookup failures).
|
||||
|
||||
The resulting performance drop can be hard to notice in a real repository, but
|
||||
becomes quite large in a repository with a large number of packs. So this
|
||||
test creates a more pathological case, since any mistakes would produce a more
|
||||
noticeable slowdown.
|
||||
'
|
||||
. ./perf-lib.sh
|
||||
. "$TEST_DIRECTORY"/perf/lib-pack.sh
|
||||
|
||||
test_expect_success 'create parent and child' '
|
||||
git init parent &&
|
||||
git clone parent child
|
||||
'
|
||||
|
||||
|
||||
test_expect_success 'create refs in the parent' '
|
||||
(
|
||||
cd parent &&
|
||||
git commit --allow-empty -m foo &&
|
||||
head=$(git rev-parse HEAD) &&
|
||||
test_seq 1000 |
|
||||
sed "s,.*,update refs/heads/& $head," |
|
||||
$MODERN_GIT update-ref --stdin
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'create many packs in the child' '
|
||||
(
|
||||
cd child &&
|
||||
setup_many_packs
|
||||
)
|
||||
'
|
||||
|
||||
test_perf 'fetch' '
|
||||
# start at the same state for each iteration
|
||||
obj=$($MODERN_GIT -C parent rev-parse HEAD) &&
|
||||
(
|
||||
cd child &&
|
||||
$MODERN_GIT for-each-ref --format="delete %(refname)" refs/remotes |
|
||||
$MODERN_GIT update-ref --stdin &&
|
||||
rm -vf .git/objects/$(echo $obj | sed "s|^..|&/|") &&
|
||||
|
||||
git fetch
|
||||
)
|
||||
'
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue