From 7a57fb1a597eb2fa281c00cac94863ad0d7d7f6c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 5 Sep 2025 08:18:01 +0200 Subject: [PATCH 1/2] t5530: modernize tests Refactor tests to follow modern best practices: - Merge together tests that set up and verify a single use case. - Drop empty newlines at the beginning and end of test bodies. - Don't change directories in the main test body. - Remove an unused `D` variable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t5530-upload-pack-error.sh | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh index 558eedf25a..8e505786f1 100755 --- a/t/t5530-upload-pack-error.sh +++ b/t/t5530-upload-pack-error.sh @@ -4,8 +4,6 @@ test_description='errors in upload-pack' . ./test-lib.sh -D=$(pwd) - corrupt_repo () { object_sha1=$(git rev-parse "$1") && ob=$(expr "$object_sha1" : "\(..\)") && @@ -21,11 +19,7 @@ test_expect_success 'setup and corrupt repository' ' test_tick && echo changed >file && git commit -a -m changed && - corrupt_repo HEAD:file - -' - -test_expect_success 'fsck fails' ' + corrupt_repo HEAD:file && test_must_fail git fsck ' @@ -40,17 +34,12 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' ' ' test_expect_success 'corrupt repo differently' ' - git hash-object -w file && - corrupt_repo HEAD^^{tree} - -' - -test_expect_success 'fsck fails' ' + corrupt_repo HEAD^^{tree} && test_must_fail git fsck ' -test_expect_success 'upload-pack fails due to error in rev-list' ' +test_expect_success 'upload-pack fails due to error in rev-list' ' printf "%04xwant %s\n%04xshallow %s00000009done\n0000" \ $(($hexsz + 10)) $(git rev-parse HEAD) \ $(($hexsz + 12)) $(git rev-parse HEAD^) >input && @@ -59,7 +48,6 @@ test_expect_success 'upload-pack fails due to error in rev-list' ' ' test_expect_success 'upload-pack fails due to bad want (no object)' ' - printf "%04xwant %s multi_ack_detailed\n00000009done\n0000" \ $(($hexsz + 29)) $(test_oid deadbeef) >input && test_must_fail git upload-pack . output 2>output.err && @@ -69,7 +57,6 @@ test_expect_success 'upload-pack fails due to bad want (no object)' ' ' test_expect_success 'upload-pack fails due to bad want (not tip)' ' - oid=$(echo an object we have | git hash-object -w --stdin) && printf "%04xwant %s multi_ack_detailed\n00000009done\n0000" \ $(($hexsz + 29)) "$oid" >input && @@ -80,7 +67,6 @@ test_expect_success 'upload-pack fails due to bad want (not tip)' ' ' test_expect_success 'upload-pack fails due to error in pack-objects enumeration' ' - printf "%04xwant %s\n00000009done\n0000" \ $((hexsz + 10)) $(git rev-parse HEAD) >input && test_must_fail git upload-pack . /dev/null 2>output.err && @@ -105,18 +91,9 @@ test_expect_success 'upload-pack tolerates EOF just after stateless client wants test_cmp expect actual ' -test_expect_success 'create empty repository' ' - - mkdir foo && - cd foo && - git init - -' - test_expect_success 'fetch fails' ' - - test_must_fail git fetch .. main - + git init foo && + test_must_fail git -C foo fetch .. main ' test_done From 88a2dc68c8c9a2cf04c6d6d52e4ac3b26788e273 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 5 Sep 2025 08:18:02 +0200 Subject: [PATCH 2/2] upload-pack: don't ACK non-commits repeatedly in protocol v2 When a client performs a fetch or clone they can optionally send "have" lines to tell the server which objects they already have available locally. These object IDs are stored by the server in an object array so that it can remember any objects it doesn't have to include in the pack sent to the client. While there isn't any reason to do so, clients are free to send the same "have" line repeatedly. git-upload-pack(1) already knows to handle this well: every commit it has seen via a "have" line gets marked with the `THEY_HAVE` flag, and if such a commit is seen repeatedly we know to not process it another time. This also has the effect that we only store the object ID once, only, in the `have_obj` array. There is an edge case though: if the client sends an object ID that does not refer to a commit we neither store nor check the `THEY_HAVE` flag. This means that we repeatedly store the same object ID in our `have_obj` array, with two consequences: - In protocol v2 we deduplicate ACKs for commits, but not for any other objects as we send ACKs for every object ID in the `have_obj` array. - The `have_obj` array can grow in size indefinitely with both protocols. The potentially-more-serious issue is the second one, as we basically have a way for an adversary to allocate arbitrarily large buffers now. Ultimately, this doesn't seem to be all that serious though: on my machine, the growth of that array is at around 4MB/s, and after roughly five minutes I was only at 1GB RSS. So this is concerning, but only mildly so. Fix this bug by storing the `THEY_HAVE` flag independent of the object type so that we don't store duplicate object IDs in `have_obj` anymore. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t5530-upload-pack-error.sh | 39 ++++++++++++++++++++++++++++++++++++ upload-pack.c | 19 +++++++++--------- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh index 8e505786f1..d40292cfb7 100755 --- a/t/t5530-upload-pack-error.sh +++ b/t/t5530-upload-pack-error.sh @@ -96,4 +96,43 @@ test_expect_success 'fetch fails' ' test_must_fail git -C foo fetch .. main ' +test_expect_success 'upload-pack ACKs repeated non-commit objects repeatedly (protocol v0)' ' + commit_id=$(git rev-parse HEAD) && + tree_id=$(git rev-parse HEAD^{tree}) && + test-tool pkt-line pack >request <<-EOF && + want $commit_id + 0000 + have $tree_id + have $tree_id + 0000 + EOF + git upload-pack --stateless-rpc . actual && + depacketize actual.raw && + grep ^ACK actual.raw >actual.acks && + cat >expect <<-EOF && + ACK $tree_id + ACK $tree_id + EOF + test_cmp expect actual.acks +' + +test_expect_success 'upload-pack ACKs repeated non-commit objects once only (protocol v2)' ' + commit_id=$(git rev-parse HEAD) && + tree_id=$(git rev-parse HEAD^{tree}) && + test-tool pkt-line pack >request <<-EOF && + command=fetch + object-format=$(test_oid algo) + 0001 + want $commit_id + have $tree_id + have $tree_id + 0000 + EOF + GIT_PROTOCOL=version=2 git upload-pack . actual && + depacketize actual.raw && + grep ^ACK actual.raw >actual.acks && + echo "ACK $tree_id" >expect && + test_cmp expect actual.acks +' + test_done diff --git a/upload-pack.c b/upload-pack.c index 26f29b85b5..037e10e8fa 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -476,20 +476,17 @@ static void create_pack_file(struct upload_pack_data *pack_data, static int do_got_oid(struct upload_pack_data *data, const struct object_id *oid) { - int we_knew_they_have = 0; struct object *o = parse_object_with_flags(the_repository, oid, PARSE_OBJECT_SKIP_HASH_CHECK | PARSE_OBJECT_DISCARD_TREE); if (!o) die("oops (%s)", oid_to_hex(oid)); + if (o->type == OBJ_COMMIT) { struct commit_list *parents; struct commit *commit = (struct commit *)o; - if (o->flags & THEY_HAVE) - we_knew_they_have = 1; - else - o->flags |= THEY_HAVE; + if (!data->oldest_have || (commit->date < data->oldest_have)) data->oldest_have = commit->date; for (parents = commit->parents; @@ -497,11 +494,13 @@ static int do_got_oid(struct upload_pack_data *data, const struct object_id *oid parents = parents->next) parents->item->object.flags |= THEY_HAVE; } - if (!we_knew_they_have) { - add_object_array(o, NULL, &data->have_obj); - return 1; - } - return 0; + + if (o->flags & THEY_HAVE) + return 0; + o->flags |= THEY_HAVE; + + add_object_array(o, NULL, &data->have_obj); + return 1; } static int got_oid(struct upload_pack_data *data,