From a0c9016abd566e9a8f988dcd387663cd0b2be078 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 6 Jul 2018 12:34:09 -0700 Subject: [PATCH 1/2] upload-pack: send refs' objects despite "filter" A filter line in a request to upload-pack filters out objects regardless of whether they are directly referenced by a "want" line or not. This means that cloning with "--filter=blob:none" (or another filter that excludes blobs) from a repository with at least one ref pointing to a blob (for example, the Git repository itself) results in output like the following: error: missing object referenced by 'refs/tags/junio-gpg-pub' and if that particular blob is not referenced by a fetched tree, the resulting clone fails fsck because there is no object from the remote to vouch that the missing object is a promisor object. Update both the protocol and the upload-pack implementation to include all explicitly specified "want" objects in the packfile regardless of the filter specification. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/technical/pack-protocol.txt | 4 +++- list-objects.c | 6 +++--- object.h | 2 +- revision.c | 1 + revision.h | 3 ++- t/t5317-pack-objects-filter-objects.sh | 16 ++++++++++++++++ t/t5616-partial-clone.sh | 16 ++++++++++++++++ 7 files changed, 42 insertions(+), 6 deletions(-) diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt index 7fee6b780a..508a344cf1 100644 --- a/Documentation/technical/pack-protocol.txt +++ b/Documentation/technical/pack-protocol.txt @@ -284,7 +284,9 @@ information is sent back to the client in the next step. The client can optionally request that pack-objects omit various objects from the packfile using one of several filtering techniques. These are intended for use with partial clone and partial fetch -operations. See `rev-list` for possible "filter-spec" values. +operations. An object that does not meet a filter-spec value is +omitted unless explicitly requested in a 'want' line. See `rev-list` +for possible filter-spec values. Once all the 'want's and 'shallow's (and optional 'deepen') are transferred, clients MUST send a flush-pkt, to tell the server side diff --git a/list-objects.c b/list-objects.c index 3eec510357..be4118889a 100644 --- a/list-objects.c +++ b/list-objects.c @@ -47,7 +47,7 @@ static void process_blob(struct rev_info *revs, pathlen = path->len; strbuf_addstr(path, name); - if (filter_fn) + if (!(obj->flags & USER_GIVEN) && filter_fn) r = filter_fn(LOFS_BLOB, obj, path->buf, &path->buf[pathlen], filter_data); @@ -132,7 +132,7 @@ static void process_tree(struct rev_info *revs, } strbuf_addstr(base, name); - if (filter_fn) + if (!(obj->flags & USER_GIVEN) && filter_fn) r = filter_fn(LOFS_BEGIN_TREE, obj, base->buf, &base->buf[baselen], filter_data); @@ -171,7 +171,7 @@ static void process_tree(struct rev_info *revs, cb_data, filter_fn, filter_data); } - if (filter_fn) { + if (!(obj->flags & USER_GIVEN) && filter_fn) { r = filter_fn(LOFS_END_TREE, obj, base->buf, &base->buf[baselen], filter_data); diff --git a/object.h b/object.h index 5c13955000..cf8da6ba83 100644 --- a/object.h +++ b/object.h @@ -27,7 +27,7 @@ struct object_array { /* * object flag allocation: - * revision.h: 0---------10 26 + * revision.h: 0---------10 2526 * fetch-pack.c: 0----5 * walker.c: 0-2 * upload-pack.c: 4 11----------------19 diff --git a/revision.c b/revision.c index 40fd91ff2b..1b37da988d 100644 --- a/revision.c +++ b/revision.c @@ -172,6 +172,7 @@ static void add_pending_object_with_path(struct rev_info *revs, strbuf_release(&buf); return; /* do not add the commit itself */ } + obj->flags |= USER_GIVEN; add_object_array_with_path(obj, name, &revs->pending, mode, path); } diff --git a/revision.h b/revision.h index b8c47b98e2..dc8f96366e 100644 --- a/revision.h +++ b/revision.h @@ -19,8 +19,9 @@ #define SYMMETRIC_LEFT (1u<<8) #define PATCHSAME (1u<<9) #define BOTTOM (1u<<10) +#define USER_GIVEN (1u<<25) /* given directly by the user */ #define TRACK_LINEAR (1u<<26) -#define ALL_REV_FLAGS (((1u<<11)-1) | TRACK_LINEAR) +#define ALL_REV_FLAGS (((1u<<11)-1) | USER_GIVEN | TRACK_LINEAR) #define DECORATE_SHORT_REFS 1 #define DECORATE_FULL_REFS 2 diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 1b0acc383b..6710c8bc8c 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -160,6 +160,22 @@ test_expect_success 'verify blob:limit=1k' ' test_cmp observed expected ' +test_expect_success 'verify explicitly specifying oversized blob in input' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=1k >filter.pack <<-EOF && + HEAD + $(git -C r2 rev-parse HEAD:large.10000) + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + test_expect_success 'verify blob:limit=1m' ' git -C r2 ls-files -s large.1000 large.10000 \ | awk -f print_2.awk \ diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index cee5565367..8a2bf86491 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -154,4 +154,20 @@ test_expect_success 'partial clone with transfer.fsckobjects=1 uses index-pack - grep "git index-pack.*--fsck-objects" trace ' +test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' ' + rm -rf src dst && + git init src && + test_commit -C src x && + test_config -C src uploadpack.allowfilter 1 && + test_config -C src uploadpack.allowanysha1inwant 1 && + + # Create a tag pointing to a blob. + BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) && + git -C src tag myblob "$BLOB" && + + git clone --filter="blob:none" "file://$(pwd)/src" dst 2>err && + ! grep "does not point to a valid object" err && + git -C dst fsck +' + test_done From a7e67c11b8b983b4a9f56f0b8990a550946ed6b0 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 6 Jul 2018 12:34:10 -0700 Subject: [PATCH 2/2] clone: check connectivity even if clone is partial The commit that introduced the partial clone feature - 548719fbdc ("clone: partial clone", 2017-12-08) - excluded connectivity checks for partial clones, but this also meant that it is possible for a clone to succeed, yet not have all objects either present or promised. Specifically, if cloning with --filter=blob:none from a repository that has a tag pointing to a blob, and the blob is not sent in the packfile, the clone will pass, even if the blob is not referenced by any tree in the packfile. Turn on connectivity checks for partial clone. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- t/t5616-partial-clone.sh | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/builtin/clone.c b/builtin/clone.c index 8f86d99c51..fa53550758 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1201,7 +1201,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) update_remote_refs(refs, mapped_refs, remote_head_points_at, branch_top.buf, reflog_msg.buf, transport, - !is_local && !filter_options.choice); + !is_local); update_head(our_head_points_at, remote_head, reflog_msg.buf); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 8a2bf86491..44d8e80171 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -170,4 +170,52 @@ test_expect_success 'partial clone fetches blobs pointed to by refs even if norm git -C dst fsck ' +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +# Converts bytes into a form suitable for inclusion in a sed command. For +# example, "printf 'ab\r\n' | hex_unpack" results in '\x61\x62\x0d\x0a'. +sed_escape () { + perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)' | + sed 's/\(..\)/\\x\1/g' +} + +test_expect_success 'upon cloning, check that all refs point to objects' ' + SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" && + rm -rf "$SERVER" repo && + test_create_repo "$SERVER" && + test_commit -C "$SERVER" foo && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && + + # Create a tag pointing to a blob. + BLOB=$(echo blob-contents | git -C "$SERVER" hash-object --stdin -w) && + git -C "$SERVER" tag myblob "$BLOB" && + + # Craft a packfile not including that blob. + git -C "$SERVER" rev-parse HEAD | + git -C "$SERVER" pack-objects --stdout >incomplete.pack && + + # Replace the existing packfile with the crafted one. The protocol + # requires that the packfile be sent in sideband 1, hence the extra + # \x01 byte at the beginning. + printf "1,/packfile/!c %04x\\\\x01%s0000" \ + "$(($(wc -c "$HTTPD_ROOT_PATH/one-time-sed" && + + # Use protocol v2 because the sed command looks for the "packfile" + # section header. + test_config -C "$SERVER" protocol.version 2 && + test_must_fail git -c protocol.version=2 clone \ + --filter=blob:none $HTTPD_URL/one_time_sed/server repo 2>err && + + grep "did not send all necessary objects" err && + + # Ensure that the one-time-sed script was used. + ! test -e "$HTTPD_ROOT_PATH/one-time-sed" +' + +stop_httpd + test_done