From 2557645357bb38ebf32f60945ec5a423e5e57cea Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Wed, 1 Jul 2026 14:18:35 +0200 Subject: [PATCH 01/13] transport-helper: fix memory leak of helper on disconnect `disconnect_helper()` only frees data inside of the `if(data->helper)` block [1]. When the transport is disconnected without the helper being fully started, `data->name` allocated in `transport_helper_init()` is never freed. Move `FREE_AND_NULL(data->name)` outside the conditional block so it's always freed on disconnect. [1]: https://lore.kernel.org/git/05fbadbae2184479c87c37675dde7bd79b3e32ab.1716465556.git.ps@pks.im/ Mentored-by: Karthik Nayak Mentored-by: Chandra Pratap Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- transport-helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transport-helper.c b/transport-helper.c index 0fa0eb2d72..8a71354d50 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -266,9 +266,9 @@ static int disconnect_helper(struct transport *transport) close(data->helper->out); fclose(data->out); res = finish_command(data->helper); - FREE_AND_NULL(data->name); FREE_AND_NULL(data->helper); } + FREE_AND_NULL(data->name); return res; } From 41009e654566d7005533a9f56b7a18eb9f1b2cab Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Wed, 1 Jul 2026 14:18:36 +0200 Subject: [PATCH 02/13] git-compat-util: add `strtoumax_szt()` with error handling We already have `strtoul_ui()` and similar functions that provide proper error handling using `strtoul` from the standard library. However, there isn't currently a variant that returns a `size_t`. Using `strtoul` is unreliable because `size_`t is platform-dependent, `unsigned long` could be too big to fit into a `size_t` or too small to hold a `size_t`. Use `strtoumax` which returns a `uintmax_t` guaranteed to be at least as large as `size_t`, add a range check against `SIZE_MAX` to prevent `size_t` overflow. This variant is needed in a subsequent commit to enable returning a `size_t` with proper error handling. Mentored-by: Karthik Nayak Mentored-by: Chandra Pratap Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- git-compat-util.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index 8809776407..5ecce5bbd2 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -975,6 +975,26 @@ static inline int strtoul_ui(char const *s, int base, unsigned int *result) return 0; } +/* + * Convert a string to a size_t using the standard library's strtoumax, with + * additional error handling to ensure robustness. + */ +static inline int strtoumax_szt(char const *s, int base, size_t *result) +{ + uintmax_t uim; + char *p; + + errno = 0; + /* negative values would be accepted by strtoul */ + if (strchr(s, '-')) + return -1; + uim = strtoumax(s, &p, base); + if ((errno || *p || p == s) || uim > SIZE_MAX) + return -1; + *result = uim; + return 0; +} + static inline int strtol_i(char const *s, int base, int *result) { long ul; From 70aea7c64cbe92c600c5e2b162efcf6a89e3e24d Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Wed, 1 Jul 2026 14:18:37 +0200 Subject: [PATCH 03/13] cat-file: declare loop counter inside for() Some code used in this series declares variable i and only uses it in a for loop, not in any other logic outside the loop. Change the declaration of i to be inside the for loop for readability. While at it, we also change its type from `int` to `size_t` where the latter makes more sense. Helped-by: Christian Couder Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 13 ++++--------- fetch-pack.c | 3 +-- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 446d649904..fab55c11de 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -723,14 +723,12 @@ static void dispatch_calls(struct batch_options *opt, struct strbuf *output, struct expand_data *data, struct queued_cmd *cmd, - int nr) + size_t nr) { - int i; - if (!opt->buffer_output) die(_("flush is only for --buffer mode")); - for (i = 0; i < nr; i++) + for (size_t i = 0; i < nr; i++) cmd[i].fn(opt, cmd[i].line, output, data); fflush(stdout); @@ -738,9 +736,7 @@ static void dispatch_calls(struct batch_options *opt, static void free_cmds(struct queued_cmd *cmd, size_t *nr) { - size_t i; - - for (i = 0; i < *nr; i++) + for (size_t i = 0; i < *nr; i++) FREE_AND_NULL(cmd[i].line); *nr = 0; @@ -767,7 +763,6 @@ static void batch_objects_command(struct batch_options *opt, size_t alloc = 0, nr = 0; while (strbuf_getdelim_strip_crlf(&input, stdin, opt->input_delim) != EOF) { - int i; const struct parse_cmd *cmd = NULL; const char *p = NULL, *cmd_end; struct queued_cmd call = {0}; @@ -777,7 +772,7 @@ static void batch_objects_command(struct batch_options *opt, if (isspace(*input.buf)) die(_("whitespace before command: '%s'"), input.buf); - for (i = 0; i < ARRAY_SIZE(commands); i++) { + for (size_t i = 0; i < ARRAY_SIZE(commands); i++) { if (!skip_prefix(input.buf, commands[i].name, &cmd_end)) continue; diff --git a/fetch-pack.c b/fetch-pack.c index 120e01f3cf..f13951d154 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1388,9 +1388,8 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf, if (advertise_sid && server_supports_v2("session-id")) packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); if (server_options && server_options->nr) { - int i; ensure_server_supports_v2("server-option"); - for (i = 0; i < server_options->nr; i++) + for (size_t i = 0; i < server_options->nr; i++) packet_buf_write(req_buf, "server-option=%s", server_options->items[i].string); } From 36542fe2afe7401e85e0355af5289203ca80794b Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Wed, 1 Jul 2026 14:18:38 +0200 Subject: [PATCH 04/13] t1006: split test utility functions into new 'lib-cat-file.sh' This refactor extracts utility functions from the cat-file's test script 't1006-cat-file.sh' into a new 'lib-cat-file.sh' dedicated library file. A subsequent commit will need this functions, the goal is to improve code reuse and readability,enabling future tests to leverage these utilities without duplicating code. Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- t/lib-cat-file.sh | 16 ++++++++++++++++ t/t1006-cat-file.sh | 13 +------------ 2 files changed, 17 insertions(+), 12 deletions(-) create mode 100644 t/lib-cat-file.sh diff --git a/t/lib-cat-file.sh b/t/lib-cat-file.sh new file mode 100644 index 0000000000..44af232d74 --- /dev/null +++ b/t/lib-cat-file.sh @@ -0,0 +1,16 @@ +# Library of git-cat-file related test functions. + +# Print a string without a trailing newline. +echo_without_newline () { + printf '%s' "$*" +} + +# Print a string without newlines and replace them with a NULL character (\0). +echo_without_newline_nul () { + echo_without_newline "$@" | tr '\n' '\0' +} + +# Calculate the length of a string. +strlen () { + echo_without_newline "$1" | wc -c | sed -e 's/^ *//' +} diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 8e2c52652c..8360f3bbd9 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -4,6 +4,7 @@ test_description='git cat-file' . ./test-lib.sh . "$TEST_DIRECTORY/lib-loose.sh" +. "$TEST_DIRECTORY"/lib-cat-file.sh test_cmdmode_usage () { test_expect_code 129 "$@" 2>err && @@ -99,18 +100,6 @@ do ' done -echo_without_newline () { - printf '%s' "$*" -} - -echo_without_newline_nul () { - echo_without_newline "$@" | tr '\n' '\0' -} - -strlen () { - echo_without_newline "$1" | wc -c | sed -e 's/^ *//' -} - run_tests () { type=$1 object_name="$2" From 662db3529574afb9707cab867aa8a6798a4a4845 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Wed, 1 Jul 2026 14:18:39 +0200 Subject: [PATCH 05/13] fetch-pack: drop static `advertise_sid` variable `write_fetch_command_and_capabilities()` is moved to `connect.c` in a subsequent commit. To prepare for that, drop the static variable usage of `advertise_sid`. Currently `advertise_sid` is used in two places: 1. In function `do_fetch_pack()`: if (!server_supports("session_id")) advertise_sid = 0; 2. In function `fetch_pack_config()`: repo_config_get_bool("transfer.advertisesid", &advertise_sid); Since `do_fetch_pack()` is only relevant for protocol v1, it can be ignored because `write_fetch_command_and_capabilities()` is only used in protocol v2. About 2, call `repo_config_get_bool()` directly inside of the function. While at it, change `hash_algo`'s type to match `hash_algo_by_name()`'s actual return type (`unsigned int`) and make it `const`. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- fetch-pack.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fetch-pack.c b/fetch-pack.c index f13951d154..ad07603755 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1380,6 +1380,9 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf, const struct string_list *server_options) { const char *hash_name; + int advertise_sid; + + repo_config_get_bool(the_repository, "transfer.advertisesid", &advertise_sid); ensure_server_supports_v2("fetch"); packet_buf_write(req_buf, "command=fetch"); @@ -1395,7 +1398,7 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf, } if (server_feature_v2("object-format", &hash_name)) { - int hash_algo = hash_algo_by_name(hash_name); + const unsigned int hash_algo = hash_algo_by_name(hash_name); if (hash_algo_by_ptr(the_hash_algo) != hash_algo) die(_("mismatched algorithms: client %s; server %s"), the_hash_algo->name, hash_name); From a50f4a20107ed5b882793e1bc47dddd25ec6eaa5 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Wed, 1 Jul 2026 14:18:40 +0200 Subject: [PATCH 06/13] fetch-pack: move `write_fetch_command_and_capabilities()` to connect.c `write_fetch_command_and_capabilities()` is refactored in a subsequent commit where it becomes a more general-purpose function, making it more accessible to additional commands in the future. Move `write_fetch_command_and_capabilities()` to `connect.c`, where there are similar purpose functions. Because `string_list` is only used as a pointer, use a forward declaration [1]. [1]: https://lore.kernel.org/git/Z0RIqUAoEob8lGfM@pks.im/ Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- connect.c | 34 ++++++++++++++++++++++++++++++++++ connect.h | 4 ++++ fetch-pack.c | 34 ---------------------------------- 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/connect.c b/connect.c index 47e39d2a73..1dced8e632 100644 --- a/connect.c +++ b/connect.c @@ -700,6 +700,40 @@ int server_supports(const char *feature) return !!server_feature_value(feature, NULL); } +void write_fetch_command_and_capabilities(struct strbuf *req_buf, + const struct string_list *server_options) +{ + const char *hash_name; + int advertise_sid; + + repo_config_get_bool(the_repository, "transfer.advertisesid", &advertise_sid); + + ensure_server_supports_v2("fetch"); + packet_buf_write(req_buf, "command=fetch"); + if (server_supports_v2("agent")) + packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized()); + if (advertise_sid && server_supports_v2("session-id")) + packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); + if (server_options && server_options->nr) { + ensure_server_supports_v2("server-option"); + for (size_t i = 0; i < server_options->nr; i++) + packet_buf_write(req_buf, "server-option=%s", + server_options->items[i].string); + } + + if (server_feature_v2("object-format", &hash_name)) { + const unsigned int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo_by_ptr(the_hash_algo) != hash_algo) + die(_("mismatched algorithms: client %s; server %s"), + the_hash_algo->name, hash_name); + packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); + } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1_LEGACY) { + die(_("the server does not support algorithm '%s'"), + the_hash_algo->name); + } + packet_buf_delim(req_buf); +} + static const char *url_scheme_name(enum url_scheme scheme) { switch (scheme) { diff --git a/connect.h b/connect.h index aa482a37fb..c4f6ea4b0a 100644 --- a/connect.h +++ b/connect.h @@ -34,4 +34,8 @@ void check_stateless_delimiter(int stateless_rpc, struct packet_reader *reader, const char *error); +struct string_list; +void write_fetch_command_and_capabilities(struct strbuf *req_buf, + const struct string_list *server_options); + #endif diff --git a/fetch-pack.c b/fetch-pack.c index ad07603755..4a8a70b5f3 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1376,40 +1376,6 @@ static int add_haves(struct fetch_negotiator *negotiator, return haves_added; } -static void write_fetch_command_and_capabilities(struct strbuf *req_buf, - const struct string_list *server_options) -{ - const char *hash_name; - int advertise_sid; - - repo_config_get_bool(the_repository, "transfer.advertisesid", &advertise_sid); - - ensure_server_supports_v2("fetch"); - packet_buf_write(req_buf, "command=fetch"); - if (server_supports_v2("agent")) - packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized()); - if (advertise_sid && server_supports_v2("session-id")) - packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); - if (server_options && server_options->nr) { - ensure_server_supports_v2("server-option"); - for (size_t i = 0; i < server_options->nr; i++) - packet_buf_write(req_buf, "server-option=%s", - server_options->items[i].string); - } - - if (server_feature_v2("object-format", &hash_name)) { - const unsigned int hash_algo = hash_algo_by_name(hash_name); - if (hash_algo_by_ptr(the_hash_algo) != hash_algo) - die(_("mismatched algorithms: client %s; server %s"), - the_hash_algo->name, hash_name); - packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); - } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1_LEGACY) { - die(_("the server does not support algorithm '%s'"), - the_hash_algo->name); - } - packet_buf_delim(req_buf); -} - static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, From 9314e59a1c2baa97fdccb98ee1848d6af26914a3 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Wed, 1 Jul 2026 14:18:41 +0200 Subject: [PATCH 07/13] connect: make `write_fetch_command_and_capabilities()` more generic Refactor `write_fetch_command_and_capabilities()`, enabling it to serve both fetch and additional commands. In this context, "command" refers to the "operations" supported by Git's wire protocol https://git-scm.com/docs/protocol-v2, such as a Git subcommand (e.g., git-fetch(1)) or a server-side operation like "object-info" as implemented in commit a2ba162 (object-info: support for retrieving object info, 2021-04-20). Refactor the function signature to accept a command instead of the hardcoded "fetch". Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- connect.c | 8 ++++---- connect.h | 8 ++++++-- fetch-pack.c | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/connect.c b/connect.c index 1dced8e632..7b472f8e5f 100644 --- a/connect.c +++ b/connect.c @@ -700,16 +700,16 @@ int server_supports(const char *feature) return !!server_feature_value(feature, NULL); } -void write_fetch_command_and_capabilities(struct strbuf *req_buf, - const struct string_list *server_options) +void write_command_and_capabilities(struct strbuf *req_buf, const char *command, + const struct string_list *server_options) { const char *hash_name; int advertise_sid; repo_config_get_bool(the_repository, "transfer.advertisesid", &advertise_sid); - ensure_server_supports_v2("fetch"); - packet_buf_write(req_buf, "command=fetch"); + ensure_server_supports_v2(command); + packet_buf_write(req_buf, "command=%s", command); if (server_supports_v2("agent")) packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized()); if (advertise_sid && server_supports_v2("session-id")) diff --git a/connect.h b/connect.h index c4f6ea4b0a..c2bf492ed9 100644 --- a/connect.h +++ b/connect.h @@ -35,7 +35,11 @@ void check_stateless_delimiter(int stateless_rpc, const char *error); struct string_list; -void write_fetch_command_and_capabilities(struct strbuf *req_buf, - const struct string_list *server_options); +/* + * Writes a command along with the requested server capabilities/features into a + * request buffer. + */ +void write_command_and_capabilities(struct strbuf *req_buf, const char *command, + const struct string_list *server_options); #endif diff --git a/fetch-pack.c b/fetch-pack.c index 4a8a70b5f3..3d32114907 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1387,7 +1387,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, int done_sent = 0; struct strbuf req_buf = STRBUF_INIT; - write_fetch_command_and_capabilities(&req_buf, args->server_options); + write_command_and_capabilities(&req_buf, "fetch", args->server_options); if (args->use_thin_pack) packet_buf_write(&req_buf, "thin-pack"); @@ -2255,7 +2255,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, the_repository, "%d", negotiation_round); strbuf_reset(&req_buf); - write_fetch_command_and_capabilities(&req_buf, server_options); + write_command_and_capabilities(&req_buf, "fetch", server_options); packet_buf_write(&req_buf, "wait-for-done"); From 34a61a86120f8b64a9b3190f15dea87b99f03602 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Wed, 1 Jul 2026 14:18:42 +0200 Subject: [PATCH 08/13] fetch-pack: move fetch initialization There are some variables initialized at the start of the `do_fetch_pack_v2()` state machine. Currently, they are initialized in `FETCH_CHECK_LOCAL`, which is the initial state set at the beginning of the function. However, a subsequent patch will allow for another initial state, while still requiring these initialized variables. Move the initialization to be before the state machine, so that they are set regardless of the initial state. Note that there is no change in behavior, because we're moving code from the beginning of the first state to just before the execution of the state machine. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- fetch-pack.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index 3d32114907..cdebd3476f 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1736,18 +1736,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, reader.me = "fetch-pack"; } + /* v2 supports these by default */ + allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1; + use_sideband = 2; + if (args->depth > 0 || args->deepen_since || args->deepen_not) + args->deepen = 1; + while (state != FETCH_DONE) { switch (state) { case FETCH_CHECK_LOCAL: sort_ref_list(&ref, ref_compare_name); QSORT(sought, nr_sought, cmp_ref_by_name); - /* v2 supports these by default */ - allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1; - use_sideband = 2; - if (args->depth > 0 || args->deepen_since || args->deepen_not) - args->deepen = 1; - /* Filter 'ref' by 'sought' and those that aren't local */ mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); From 3fdd8cc7c391cb37f4f52b943297a1a247eca752 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Wed, 1 Jul 2026 14:18:43 +0200 Subject: [PATCH 09/13] serve: advertise object-info feature In order for a client to know what `object-info` components a server can provide, advertise supported `object-info` features. This allows a client to decide whether to query the server for object-info or fetch as a fallback. While at it, update the `object-info` section in `gitprotocol-v2.adoc`: - Require full `obj-oid` explicitly. - Fix parentheses. - Define `obj-size` explicitly. - Make `obj-size` optional in `obj-info` and document the behavior for unrecognized object IDs. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- Documentation/gitprotocol-v2.adoc | 11 ++++++++--- serve.c | 5 ++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Documentation/gitprotocol-v2.adoc b/Documentation/gitprotocol-v2.adoc index befa697d21..f21a6cbcaa 100644 --- a/Documentation/gitprotocol-v2.adoc +++ b/Documentation/gitprotocol-v2.adoc @@ -568,21 +568,26 @@ An `object-info` request takes the following arguments: oid Indicates to the server an object which the client wants to obtain - information for. + information for. They must be full object IDs. The response of `object-info` is a list of the requested object ids and associated requested information, each separated by a single space. output = info flush-pkt - info = PKT-LINE(attrs) LF) + info = PKT-LINE(attrs LF) *PKT-LINE(obj-info LF) attrs = attr | attrs SP attrs + obj-size = 1*DIGIT + attr = "size" - obj-info = obj-id SP obj-size + obj-info = obj-id SP [obj-size] + + If the server does not recognize the object id, the response will be + `obj-id SP` regardless of the number of attributes requested. bundle-uri ~~~~~~~~~~ diff --git a/serve.c b/serve.c index 49a6e39b1d..2b07d922b3 100644 --- a/serve.c +++ b/serve.c @@ -89,7 +89,7 @@ static void session_id_receive(struct repository *r UNUSED, trace2_data_string("transfer", NULL, "client-sid", client_sid); } -static int object_info_advertise(struct repository *r, struct strbuf *value UNUSED) +static int object_info_advertise(struct repository *r, struct strbuf *value) { if (advertise_object_info == -1 && repo_config_get_bool(r, "transfer.advertiseobjectinfo", @@ -97,6 +97,9 @@ static int object_info_advertise(struct repository *r, struct strbuf *value UNUS /* disabled by default */ advertise_object_info = 0; } + /* Currently only size is supported */ + if (value && advertise_object_info) + strbuf_addstr(value, "size"); return advertise_object_info; } From cc06a620c399f12827117e2ba9d21b215e372f4a Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Wed, 1 Jul 2026 14:18:44 +0200 Subject: [PATCH 10/13] transport: add client support for object-info Sometimes, it is beneficial to retrieve information about an object without downloading it entirely. The server-side logic for this functionality was implemented in commit "a2ba162cda (object-info: support for retrieving object info, 2021-04-20)." And the wire format is documented at https://git-scm.com/docs/protocol-v2#_object_info. Introduce client-side support for the object-info capability. Add its own function for object-info separate from existing fetch infrastructure. Currently, the client supports requesting a list of object IDs with the `size` feature from a v2 server. If the server does not advertise this feature (i.e., transfer.advertiseobjectinfo is set to false), the client returns an error and exit. Note that: 1. the entire request is written into `req_buf` before being sent to the remote. This approach follows the pattern used in the `send_fetch_request()` logic within 'fetch-pack.c'. Streaming the request is not addressed in this patch. 2. When the server does not recognize an OID, following the v2 protocol, the server returns " SP", when this happens, `fetch_object_info()` sets the corresponding size pointer to NULL so that callers can detect and handle it. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- Makefile | 1 + fetch-object-info.c | 95 ++++++++++++++++++++++++++++++++++++++++++++ fetch-object-info.h | 22 ++++++++++ fetch-pack.h | 1 + meson.build | 1 + transport-helper.c | 13 +++++- transport-internal.h | 8 ++++ transport.c | 46 +++++++++++++++++++++ transport.h | 10 +++++ 9 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 fetch-object-info.c create mode 100644 fetch-object-info.h diff --git a/Makefile b/Makefile index 1cec251f43..ec4df39a6b 100644 --- a/Makefile +++ b/Makefile @@ -1159,6 +1159,7 @@ LIB_OBJS += ewah/ewah_rlw.o LIB_OBJS += exec-cmd.o LIB_OBJS += fetch-negotiator.o LIB_OBJS += fetch-pack.o +LIB_OBJS += fetch-object-info.o LIB_OBJS += fmt-merge-msg.o LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o diff --git a/fetch-object-info.c b/fetch-object-info.c new file mode 100644 index 0000000000..03cfb70338 --- /dev/null +++ b/fetch-object-info.c @@ -0,0 +1,95 @@ +#include "git-compat-util.h" +#include "gettext.h" +#include "hex.h" +#include "pkt-line.h" +#include "connect.h" +#include "oid-array.h" +#include "odb.h" +#include "fetch-object-info.h" +#include "string-list.h" + +/* Sends object-info command and its arguments into the request buffer. */ +static void send_object_info_request(const int fd_out, struct object_info_args *args) +{ + struct strbuf req_buf = STRBUF_INIT; + + write_command_and_capabilities(&req_buf, "object-info", args->server_options); + + if (unsorted_string_list_has_string(args->object_info_options, "size")) + packet_buf_write(&req_buf, "size"); + else + BUG("only size should be in object_info_options"); + + if (args->oids) + for (size_t i = 0; i < args->oids->nr; i++) + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); + + packet_buf_flush(&req_buf); + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) + die_errno(_("unable to write request to remote")); + + strbuf_release(&req_buf); +} + +int fetch_object_info(const enum protocol_version version, struct object_info_args *args, + struct packet_reader *reader, struct object_info *object_info_data, + const int stateless_rpc, const int fd_out) +{ + int size_index = -1; + + switch (version) { + case protocol_v2: + if (!server_supports_v2("object-info")) + die(_("object-info capability is not enabled on the server")); + send_object_info_request(fd_out, args); + break; + case protocol_v1: + case protocol_v0: + die(_("unsupported protocol version. expected v2")); + case protocol_unknown_version: + BUG("unknown protocol version"); + } + + for (size_t i = 0; i < args->object_info_options->nr; i++) { + if (packet_reader_read(reader) != PACKET_READ_NORMAL) { + check_stateless_delimiter(stateless_rpc, reader, + "stateless delimiter expected"); + return -1; + } + + if (!string_list_has_string(args->object_info_options, reader->line)) + return -1; + + if (!strcmp(reader->line, "size")) { + size_index = i; + for (size_t j = 0; j < args->oids->nr; j++) + object_info_data[j].sizep = xcalloc(1, sizeof(*object_info_data[j].sizep)); + } else { + BUG("only size is supported"); + } + } + + for (size_t i = 0; packet_reader_read(reader) == PACKET_READ_NORMAL && i < args->oids->nr; i++) { + struct string_list object_info_values = STRING_LIST_INIT_DUP; + + string_list_split(&object_info_values, reader->line, " ", -1); + if (size_index >= 0) { + if (!strcmp(object_info_values.items[1 + size_index].string, "")) { + FREE_AND_NULL(object_info_data[i].sizep); + string_list_clear(&object_info_values, 0); + continue; + } + + if (strtoumax_szt(object_info_values.items[1 + size_index].string, + 10, object_info_data[i].sizep)) + die("object-info: ref %s has invalid size %s", + object_info_values.items[0].string, + object_info_values.items[1 + size_index].string); + } + + string_list_clear(&object_info_values, 0); + } + check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected"); + + return 0; +} diff --git a/fetch-object-info.h b/fetch-object-info.h new file mode 100644 index 0000000000..d35284bd6b --- /dev/null +++ b/fetch-object-info.h @@ -0,0 +1,22 @@ +#ifndef FETCH_OBJECT_INFO_H +#define FETCH_OBJECT_INFO_H + +#include "pkt-line.h" +#include "protocol.h" +#include "odb.h" + +struct object_info_args { + struct string_list *object_info_options; + const struct string_list *server_options; + struct oid_array *oids; +}; + +/* + * Sends git-cat-file object-info command into the request buf and read the + * results from packets. + */ +int fetch_object_info(enum protocol_version version, struct object_info_args *args, + struct packet_reader *reader, struct object_info *object_info_data, + int stateless_rpc, int fd_out); + +#endif /* FETCH_OBJECT_INFO_H */ diff --git a/fetch-pack.h b/fetch-pack.h index 6d0dec7f41..0fba340a84 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -16,6 +16,7 @@ struct fetch_pack_args { const struct string_list *deepen_not; struct list_objects_filter_options filter_options; const struct string_list *server_options; + struct object_info *object_info_data; /* * If not NULL, during packfile negotiation, fetch-pack will send "have" diff --git a/meson.build b/meson.build index 3247697f74..145c6882eb 100644 --- a/meson.build +++ b/meson.build @@ -347,6 +347,7 @@ libgit_sources = [ 'exec-cmd.c', 'fetch-negotiator.c', 'fetch-pack.c', + 'fetch-object-info.c', 'fmt-merge-msg.c', 'fsck.c', 'fsmonitor.c', diff --git a/transport-helper.c b/transport-helper.c index 8a71354d50..f9c14d6f47 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -727,8 +727,7 @@ static int fetch_refs(struct transport *transport, /* * If we reach here, then the server, the client, and/or the transport - * helper does not support protocol v2. --negotiate-only requires - * protocol v2. + * helper does not support protocol v2. --negotiate-only. */ if (data->transport_options.acked_commits) { warning(_("--negotiate-only requires protocol v2")); @@ -784,6 +783,15 @@ static int fetch_refs(struct transport *transport, return -1; } +static int fetch_object_info_helper(struct transport *transport) +{ + get_helper(transport); + if (process_connect(transport, 0)) + return transport->vtable->fetch_object_info(transport); + + die(_("object-info requires protocol v2")); +} + struct push_update_ref_state { struct ref *hint; struct ref_push_report *report; @@ -1330,6 +1338,7 @@ static struct transport_vtable vtable = { .get_refs_list = get_refs_list, .get_bundle_uri = get_bundle_uri, .fetch_refs = fetch_refs, + .fetch_object_info = fetch_object_info_helper, .push_refs = push_refs, .connect = connect_helper, .disconnect = release_helper diff --git a/transport-internal.h b/transport-internal.h index 051f3ab0dc..60db0bedcd 100644 --- a/transport-internal.h +++ b/transport-internal.h @@ -45,6 +45,14 @@ struct transport_vtable { **/ int (*fetch_refs)(struct transport *transport, int refs_nr, struct ref **refs); + /* + * Fetch object info (only size currently) from remote without + * downloading the objects. + * + * Uses object-info capability of v2 protocol. + */ + int (*fetch_object_info)(struct transport *transport); + /** * Push the objects and refs. Send the necessary objects, and * then, for any refs where peer_ref is set and diff --git a/transport.c b/transport.c index 0f5ec30247..602b1c5512 100644 --- a/transport.c +++ b/transport.c @@ -1,3 +1,4 @@ +#include "compat/posix.h" #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" @@ -9,6 +10,7 @@ #include "hook.h" #include "pkt-line.h" #include "fetch-pack.h" +#include "fetch-object-info.h" #include "remote.h" #include "connect.h" #include "send-pack.h" @@ -432,6 +434,48 @@ static int get_bundle_uri(struct transport *transport) transport->bundles, stateless_rpc); } +static int fetch_object_info_via_pack(struct transport *transport) +{ + int ret = 0; + struct git_transport_data *data = transport->data; + struct packet_reader reader; + struct object_info_args args = { 0 }; + + args.server_options = transport->server_options; + args.oids = transport->smart_options->object_info_oids; + args.object_info_options = transport->smart_options->object_info_options; + string_list_sort(args.object_info_options); + + connect_setup(transport, 0); + packet_reader_init(&reader, data->fd[0], NULL, 0, + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_GENTLE_ON_EOF | + PACKET_READ_DIE_ON_ERR_PACKET); + + data->version = discover_version(&reader); + transport->hash_algo = reader.hash_algo; + + ret = fetch_object_info(data->version, &args, &reader, + data->options.object_info_data, + transport->stateless_rpc, data->fd[1]); + + close(data->fd[0]); + if (data->fd[1] >= 0) + close(data->fd[1]); + if (finish_connect(data->conn)) + ret = -1; + data->conn = NULL; + + return ret; +} + +int transport_fetch_object_info(struct transport *transport) +{ + if (!transport->vtable->fetch_object_info) + die(_("remote does not support object-info")); + return transport->vtable->fetch_object_info(transport); +} + static int fetch_refs_via_pack(struct transport *transport, int nr_heads, struct ref **to_fetch) { @@ -1004,6 +1048,7 @@ static struct transport_vtable taken_over_vtable = { .get_refs_list = get_refs_via_connect, .get_bundle_uri = get_bundle_uri, .fetch_refs = fetch_refs_via_pack, + .fetch_object_info = fetch_object_info_via_pack, .push_refs = git_transport_push, .disconnect = disconnect_git }; @@ -1169,6 +1214,7 @@ static struct transport_vtable builtin_smart_vtable = { .get_refs_list = get_refs_via_connect, .get_bundle_uri = get_bundle_uri, .fetch_refs = fetch_refs_via_pack, + .fetch_object_info = fetch_object_info_via_pack, .push_refs = git_transport_push, .connect = connect_git, .disconnect = disconnect_git diff --git a/transport.h b/transport.h index 7e5867cffa..9e85a4cd35 100644 --- a/transport.h +++ b/transport.h @@ -6,6 +6,7 @@ #include "list-objects-filter-options.h" #include "string-list.h" #include "connect.h" +#include "odb.h" struct git_transport_options { unsigned thin : 1; @@ -55,6 +56,10 @@ struct git_transport_options { * common commits to this oidset instead of fetching any packfiles. */ struct oidset *acked_commits; + + struct oid_array *object_info_oids; + struct object_info *object_info_data; + struct string_list *object_info_options; }; enum transport_family { @@ -309,6 +314,11 @@ int transport_get_remote_bundle_uri(struct transport *transport); const struct git_hash_algo *transport_get_hash_algo(struct transport *transport); int transport_fetch_refs(struct transport *transport, struct ref *refs); +/* + * Fetch the object info from remote + */ +int transport_fetch_object_info(struct transport *transport); + /* * If this flag is set, unlocking will avoid to call non-async-signal-safe * functions. This will necessarily leave behind some data structures which From 7134a94db6d634c6ade1d9670bc63e12d05d0fb8 Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Wed, 1 Jul 2026 14:18:45 +0200 Subject: [PATCH 11/13] cat-file: add remote-object-info to batch-command Since the `info` command in `cat-file --batch-command` prints object info for a given object, it is natural to add another command in `cat-file --batch-command` to print object info for a given object from a remote. Add `remote-object-info` to `cat-file --batch-command`. While `info` takes object ids one at a time, this creates overhead when making requests to a server. So `remote-object-info` instead can take multiple object ids at once. The `cat-file --batch-command` command is generally implemented in the following manner: - Receive and parse input from user - Call respective function attached to command - Get object info, print object info In --buffer mode, this changes to: - Receive and parse input from user - Store respective function attached to command in a queue - After flush, loop through commands in queue - Call respective function attached to command - Get object info, print object info Notice how the getting and printing of object info is accomplished one at a time. As described above, this creates a problem for making requests to a server. Therefore, `remote-object-info` is implemented in the following manner: - Receive and parse input from user If command is `remote-object-info`: - Get object info from remote - Loop through and print each object info Else: - Call respective function attached to command - Parse input, get object info, print object info And finally for --buffer mode `remote-object-info`: - Receive and parse input from user - Store respective function attached to command in a queue - After flush, loop through commands in queue: If command is `remote-object-info`: - Get object info from remote - Loop through and print each object info Else: - Call respective function attached to command - Get object info, print object info To summarize, `remote-object-info` gets object info from the remote and then loops through the object info passed in, printing the info. In order for `remote-object-info` to avoid remote communication overhead in the non-buffer mode, the objects are passed in as such: remote-object-info ... rather than remote-object-info remote-object-info ... remote-object-info Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.adoc | 29 +- builtin/cat-file.c | 144 +++++- object-file.c | 10 + odb.h | 3 + t/meson.build | 1 + t/t1017-cat-file-remote-object-info.sh | 680 +++++++++++++++++++++++++ 6 files changed, 859 insertions(+), 8 deletions(-) create mode 100755 t/t1017-cat-file-remote-object-info.sh diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index 86b9181599..a7fa6674c3 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -169,6 +169,13 @@ info :: Print object info for object reference ``. This corresponds to the output of `--batch-check`. +remote-object-info ...:: + Print object info for object references `` at specified + `` without downloading objects from the remote. + Raise an error when the `object-info` capability is not supported by the remote. + Raise an error when no object references are provided. + This command may be combined with `--buffer`. + flush:: Used with `--buffer` to execute all preceding commands that were issued since the beginning or since the last flush was issued. When `--buffer` @@ -301,7 +308,8 @@ one per line, and print information based on the command given. With `--batch-command`, the `info` command followed by an object will print information about the object the same way `--batch-check` would, and the `contents` command followed by an object prints contents in the same way -`--batch` would. +`--batch` would. The `remote-object-info` command followed by a remote and +objects IDs prints object info from the remote without downloading the objects. You can specify the information shown for each object by using a custom ``. The `` is copied literally to stdout for each @@ -324,15 +332,12 @@ newline. The available atoms are: reports). `objectsize:disk`:: - The size, in bytes, that the object takes up on disk. See the - note about on-disk sizes in the `CAVEATS` section below. + The size, in bytes, that the object takes up on disk. `deltabase`:: If the object is stored as a delta on-disk, this expands to the full hex representation of the delta base object name. - Otherwise, expands to the null OID (all zeroes). See `CAVEATS` - below. - + Otherwise, expands to the null OID (all zeroes). `rest`:: If this atom is used in the output string, input lines are split at the first whitespace boundary. All characters before that @@ -340,8 +345,14 @@ newline. The available atoms are: after that first run of whitespace (i.e., the "rest" of the line) are output in place of the `%(rest)` atom. +The command `remote-object-info` only supports the `%(objectname)` and +`%(objectsize)` placeholders. See `CAVEATS` below for more information. + If no format is specified, the default format is `%(objectname) -%(objecttype) %(objectsize)`. +%(objecttype) %(objectsize)`, except for `remote-object-info` commands which +use `%(objectname) %(objectsize)` because "%(objecttype)" is not supported yet. +WARNING: When "%(objecttype)" is supported, the default format WILL be unified, +so DO NOT RELY on the current default format to stay the same!!! If `--batch` is specified, or if `--batch-command` is used with the `contents` command, the object information is followed by the object contents (consisting @@ -438,6 +449,10 @@ scripting purposes. CAVEATS ------- +Note that since only `%(objectname)` and `%(objectsize)` are currently +supported by the `remote-object-info` command. Using any other placeholder in +the format string will raise an error. + Note that the sizes of objects on disk are reported accurately, but care should be taken in drawing conclusions about which refs or objects are responsible for disk usage. The size of a packed non-delta object may be diff --git a/builtin/cat-file.c b/builtin/cat-file.c index fab55c11de..ea2230be04 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -29,6 +29,22 @@ #include "promisor-remote.h" #include "mailmap.h" #include "write-or-die.h" +#include "alias.h" +#include "remote.h" +#include "transport.h" + +/* + * Maximum length for a remote URL. While no universal standard exists, + * 8K is assumed to be a reasonable limit. + */ +#define MAX_REMOTE_URL_LEN (8 * 1024) + +/* Maximum number of objects allowed in a single remote-object-info request. */ +#define MAX_ALLOWED_OBJ_LIMIT 10000 + +/* Maximum input size permitted for the remote-object-info command. */ +#define MAX_REMOTE_OBJ_INFO_LINE \ + (MAX_REMOTE_URL_LEN + MAX_ALLOWED_OBJ_LIMIT * (GIT_MAX_HEXSZ + 1)) enum batch_mode { BATCH_MODE_CONTENTS, @@ -638,6 +654,80 @@ out: object_context_release(&ctx); } +static int get_remote_info(struct batch_options *opt, + int argc, + const char **argv, + struct object_info **remote_object_info, + struct oid_array *object_info_oids) +{ + int retval = 0; + struct remote *remote = NULL; + struct object_id oid; + struct string_list object_info_options = STRING_LIST_INIT_NODUP; + struct transport *gtransport; + + /* + * TODO: Change the format to "%(objectname) %(objectsize)" when + * remote-object-info command is used. Once we start supporting objecttype + * the default format should change to DEFAULT_FORMAT. + */ + if (!opt->format) + opt->format = "%(objectname) %(objectsize)"; + + remote = remote_get(argv[0]); + if (!remote) + die(_("must supply valid remote when using remote-object-info")); + + oid_array_clear(object_info_oids); + for (size_t i = 1; i < argc; i++) { + if (get_oid_hex(argv[i], &oid)) { + size_t len = strlen(argv[i]); + + if (len < the_hash_algo->hexsz && len >= 4) { + size_t j; + for (j = 0; j < len; j++) + if (!isxdigit(argv[i][j])) + break; + if (j == len) + die(_("remote-object-info does not support " + "short oids, %d characters required"), + (int)the_hash_algo->hexsz); + } + die(_("not a valid object name '%s'"), argv[i]); + } + oid_array_append(object_info_oids, &oid); + } + + if (!object_info_oids->nr) + die(_("remote-object-info requires objects")); + + gtransport = transport_get(remote, NULL); + + if (!gtransport->smart_options) { + retval = -1; + goto cleanup; + } + + CALLOC_ARRAY(*remote_object_info, object_info_oids->nr); + gtransport->smart_options->object_info_oids = object_info_oids; + + /* 'objectsize' is the only option currently supported */ + if (!strstr(opt->format, "%(objectsize)")) + die(_("%s is currently not supported with remote-object-info"), opt->format); + + string_list_append(&object_info_options, "size"); + + if (object_info_options.nr > 0) { + gtransport->smart_options->object_info_options = &object_info_options; + gtransport->smart_options->object_info_data = *remote_object_info; + retval = transport_fetch_object_info(gtransport); + } +cleanup: + string_list_clear(&object_info_options, 0); + transport_disconnect(gtransport); + return retval; +} + struct object_cb_data { struct batch_options *opt; struct expand_data *expand; @@ -719,6 +809,57 @@ static void parse_cmd_mailmap(struct batch_options *opt UNUSED, load_mailmap(); } +static void parse_cmd_remote_object_info(struct batch_options *opt, + const char *line, struct strbuf *output, + struct expand_data *data) +{ + int count; + const char **argv; + char *line_to_split; + struct object_info *remote_object_info = NULL; + struct oid_array object_info_oids = OID_ARRAY_INIT; + + if (strlen(line) >= MAX_REMOTE_OBJ_INFO_LINE) + die(_("remote-object-info command too long")); + + line_to_split = xstrdup(line); + count = split_cmdline(line_to_split, &argv); + if (count < 0) + die(_("remote-object-info: %s"), split_cmdline_strerror(count)); + if (count - 1 > MAX_ALLOWED_OBJ_LIMIT) + die(_("remote-object-info supports at most %d objects"), + MAX_ALLOWED_OBJ_LIMIT); + + if (get_remote_info(opt, count, argv, &remote_object_info, + &object_info_oids)) + goto cleanup; + + data->skip_object_info = 1; + for (size_t i = 0; i < object_info_oids.nr; i++) { + data->oid = object_info_oids.oid[i]; + if (remote_object_info[i].sizep) { + /* + * When reaching here, it means remote-object-info can retrieve + * information from server without downloading them. + */ + data->size = *remote_object_info[i].sizep; + opt->batch_mode = BATCH_MODE_INFO; + batch_object_write(argv[i + 1], output, opt, data, NULL, 0); + } else { + report_object_status(opt, oid_to_hex(&data->oid), &data->oid, "missing"); + } + } + data->skip_object_info = 0; + +cleanup: + for (size_t i = 0; i < object_info_oids.nr; i++) + free_object_info_contents(&remote_object_info[i]); + free(line_to_split); + free(argv); + free(remote_object_info); + oid_array_clear(&object_info_oids); +} + static void dispatch_calls(struct batch_options *opt, struct strbuf *output, struct expand_data *data, @@ -750,8 +891,9 @@ static const struct parse_cmd { } commands[] = { { "contents", parse_cmd_contents, 1 }, { "info", parse_cmd_info, 1 }, - { "flush", NULL, 0 }, { "mailmap", parse_cmd_mailmap, 1 }, + { "remote-object-info", parse_cmd_remote_object_info, 1 }, + { "flush", NULL, 0 }, }; static void batch_objects_command(struct batch_options *opt, diff --git a/object-file.c b/object-file.c index 9afa842da2..ef31a47939 100644 --- a/object-file.c +++ b/object-file.c @@ -1694,3 +1694,13 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source) return &transaction->base; } + +void free_object_info_contents(struct object_info *object_info) +{ + if (!object_info) + return; + free(object_info->typep); + free(object_info->sizep); + free(object_info->disk_sizep); + free(object_info->delta_base_oid); +} diff --git a/odb.h b/odb.h index 0030467a52..168ea12da7 100644 --- a/odb.h +++ b/odb.h @@ -573,4 +573,7 @@ void parse_alternates(const char *string, const char *relative_base, struct strvec *out); +/* Free pointers inside of object_info, but not object_info itself */ +void free_object_info_contents(struct object_info *object_info); + #endif /* ODB_H */ diff --git a/t/meson.build b/t/meson.build index c5832fee05..33327dd1df 100644 --- a/t/meson.build +++ b/t/meson.build @@ -170,6 +170,7 @@ integration_tests = [ 't1014-read-tree-confusing.sh', 't1015-read-index-unmerged.sh', 't1016-compatObjectFormat.sh', + 't1017-cat-file-remote-object-info.sh', 't1020-subdirectory.sh', 't1022-read-tree-partial-clone.sh', 't1050-large.sh', diff --git a/t/t1017-cat-file-remote-object-info.sh b/t/t1017-cat-file-remote-object-info.sh new file mode 100755 index 0000000000..49b6660934 --- /dev/null +++ b/t/t1017-cat-file-remote-object-info.sh @@ -0,0 +1,680 @@ +#!/bin/sh + +test_description='git cat-file --batch-command with remote-object-info command' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-cat-file.sh + +hello_content="Hello World" +hello_size=$(strlen "$hello_content") +hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin) +hello_short_oid=$(git rev-parse --short "$hello_oid") + +unstored_content="Hello Git" +unstored_oid=$(echo_without_newline "$unstored_content" | git hash-object --stdin) + +# This is how we get 13: +# 13 = + + + , where +# file mode is 100644, which is 6 characters; +# file name is hello, which is 5 characters +# a space is 1 character and a null is 1 character +tree_size=$(($(test_oid rawsz) + 13)) + +commit_message="Initial commit" + +# This is how we get 137: +# 137 = + + + +# + + +# + + +# + +# +# An easier way to calculate is: 1. use `git cat-file commit | wc -c`, +# to get 177, 2. then deduct 40 hex characters to get 137 +commit_size=$(($(test_oid hexsz) + 137)) + +tag_header_without_oid="type blob +tag hellotag +tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" +tag_header_without_timestamp="object $hello_oid +$tag_header_without_oid" +tag_description="This is a tag" +tag_content="$tag_header_without_timestamp 0 +0000 + +$tag_description" + +tag_oid=$(echo_without_newline "$tag_content" | git hash-object -t tag --stdin -w) +tag_size=$(strlen "$tag_content") + +set_transport_variables () { + hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin) + tree_oid=$(git -C "$1" write-tree) + commit_oid=$(echo_without_newline "$commit_message" | git -C "$1" commit-tree $tree_oid) + tag_oid=$(echo_without_newline "$tag_content" | git -C "$1" hash-object -t tag --stdin -w) + tag_size=$(strlen "$tag_content") +} + +# This section tests --batch-command with remote-object-info command +# Since "%(objecttype)" is currently not supported by the command remote-object-info , +# the filters are set to "%(objectname) %(objectsize)" in some test cases. + +# Test --batch-command remote-object-info with 'git://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability +. "$TEST_DIRECTORY"/lib-git-daemon.sh +start_git_daemon --export-all --enable=receive-pack +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent + +test_expect_success 'create repo to be served by git-daemon' ' + git init "$daemon_parent" && + echo_without_newline "$hello_content" > $daemon_parent/hello && + git -C "$daemon_parent" update-index --add hello && + git -C "$daemon_parent" config transfer.advertiseobjectinfo true && + git clone "$GIT_DAEMON_URL/parent" -n "$daemon_parent/daemon_client_empty" +' + +test_expect_success 'batch-command remote-object-info git://' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid + remote-object-info "$GIT_DAEMON_URL/parent" $tree_oid + remote-object-info "$GIT_DAEMON_URL/parent" $commit_oid + remote-object-info "$GIT_DAEMON_URL/parent" $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// multiple sha1 per line' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// default filter' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + GIT_TRACE_PACKET=1 git cat-file --batch-command >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid $tree_oid + remote-object-info "$GIT_DAEMON_URL/parent" $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info git://' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid $tree_oid + remote-object-info "$GIT_DAEMON_URL/parent" $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info git:// default filter' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + printf "%s\0" "$hello_oid missing" >>expect && + printf "%s\0" "$tree_oid missing" >>expect && + printf "%s\0" "$commit_oid missing" >>expect && + printf "%s\0" "$tag_oid missing" >>expect && + + batch_input="remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid +remote-object-info $GIT_DAEMON_URL/parent $commit_oid $tag_oid +info $hello_oid +info $tree_oid +info $commit_oid +info $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +test_expect_success 'remote-object-info does not support short oids' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + test_must_fail git cat-file --batch-command 2>err <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_short_oid + EOF + test_grep "does not support short oids" err + ) +' + +test_expect_success 'remote-object-info does not die on missing oid like info' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + git cat-file --batch-command >local <<-EOF && + info $unstored_oid + EOF + git cat-file --batch-command >remote <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $unstored_oid + EOF + test_cmp local remote + ) +' + +# Test --batch-command remote-object-info with 'git://' and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info git:// fails when transfer.advertiseobjectinfo=false' ' + ( + git -C "$daemon_parent" config transfer.advertiseobjectinfo false && + set_transport_variables "$daemon_parent" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "$daemon_parent" config transfer.advertiseobjectinfo true + + ) +' + +stop_git_daemon + +# Test --batch-command remote-object-info with 'file://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability +# shellcheck disable=SC2016 +test_expect_success 'create repo to be served by file:// transport' ' + git init server && + git -C server config protocol.version 2 && + git -C server config transfer.advertiseobjectinfo true && + echo_without_newline "$hello_content" > server/hello && + git -C server update-index --add hello && + git clone -n "file://$(pwd)/server" file_client_empty +' + +test_expect_success 'batch-command remote-object-info file://' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid + remote-object-info "file://${server_path}" $tree_oid + remote-object-info "file://${server_path}" $commit_oid + remote-object-info "file://${server_path}" $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// multiple sha1 per line' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info file://' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid + remote-object-info "file://${server_path}" $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// default filter' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + git cat-file --batch-command >actual <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid + remote-object-info "file://${server_path}" $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info file:// default filter' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + printf "%s\0" "$hello_oid missing" >>expect && + printf "%s\0" "$tree_oid missing" >>expect && + printf "%s\0" "$commit_oid missing" >>expect && + printf "%s\0" "$tag_oid missing" >>expect && + + batch_input="remote-object-info \"file://${server_path}\" $hello_oid $tree_oid +remote-object-info \"file://${server_path}\" $commit_oid $tag_oid +info $hello_oid +info $tree_oid +info $commit_oid +info $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +# Test --batch-command remote-object-info with 'file://' and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info file:// fails when transfer.advertiseobjectinfo=false' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + git -C "${server_path}" config transfer.advertiseobjectinfo false && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "file://${server_path}" $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "${server_path}" config transfer.advertiseobjectinfo true + ) +' + +# Test --batch-command remote-object-info with 'http://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'create repo to be served by http:// transport' ' + git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo true && + echo_without_newline "$hello_content" > $HTTPD_DOCUMENT_ROOT_PATH/http_parent/hello && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" update-index --add hello && + git clone "$HTTPD_URL/smart/http_parent" -n "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" +' + +test_expect_success 'batch-command remote-object-info http://' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $tree_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $commit_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// one line' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info http://' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + git cat-file --batch-command >actual <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid + remote-object-info "$HTTPD_URL/smart/http_parent" $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info http:// default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + batch_input="remote-object-info $HTTPD_URL/smart/http_parent $hello_oid $tree_oid +remote-object-info $HTTPD_URL/smart/http_parent $commit_oid $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +test_expect_success 'remote-object-info fails on unsupported filter option (objectsize:disk)' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(objectsize:disk)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "%(objectsize:disk) is currently not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on unsupported filter option (deltabase)' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(deltabase)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "%(deltabase) is currently not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on server with legacy protocol' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git -c protocol.version=0 cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "object-info requires protocol v2" err + ) +' + +test_expect_success 'remote-object-info fails on server with legacy protocol with default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git -c protocol.version=0 cat-file --batch-command 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "object-info requires protocol v2" err + ) +' + +test_expect_success 'remote-object-info fails on malformed OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + malformed_object_id="this_id_is_not_valid" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id + EOF + test_grep "not a valid object name '$malformed_object_id'" err + ) +' + +test_expect_success 'remote-object-info fails on malformed OID with default filter' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + malformed_object_id="this_id_is_not_valid" && + + test_must_fail git cat-file --batch-command 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id + EOF + test_grep "not a valid object name '$malformed_object_id'" err + ) +' + +test_expect_success 'remote-object-info fails on not providing OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" + EOF + test_grep "remote-object-info requires objects" err + ) +' + + +# Test --batch-command remote-object-info with 'http://' transport and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info http:// fails when transfer.advertiseobjectinfo=false ' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo false && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo true + ) +' + +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + +test_done From 03e2e844461a196b91215536b3207589b047d53f Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Wed, 1 Jul 2026 14:18:46 +0200 Subject: [PATCH 12/13] cat-file: validate remote atoms with an allow-list `strstr()` is not enough to validate the format placeholders in `remote-object-info` causing two errors: 1. Atoms recognized by `expand_atom()` but the remote doesn't returns 1, but `data->type` contains garbage causing segfault. 2. `expand_atom()` returns 0 for unknown atoms, calling `strbuf_expand_bad_format()` which ends up dying, blocking local queries if the same format is shared. Add an allow-list with the supported atoms at the top of `expand_atom()`. In remote mode, unsupported atoms return 1 leaving the buffer empty, honoring how `for-each-ref` handles known but inapplicable atoms. As extra safety, initialize `data->type` to `OBJ_BAD` and add a `NULL` check for `type_name()` so uninitialized data doesn't cause segfault. Update tests that expect previous `die()` behavior to expect an empty string and add an explicit test for empty string return on unknown placeholder. Update cat-file command documentation regarding `remote-object-info`. Mentored-by: Karthik Nayak Mentored-by: Chandra Pratap Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.adoc | 2 +- builtin/cat-file.c | 41 +++++++++++++++++++++----- t/t1017-cat-file-remote-object-info.sh | 27 ++++++++++++++--- 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index a7fa6674c3..643eac9245 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -451,7 +451,7 @@ CAVEATS Note that since only `%(objectname)` and `%(objectsize)` are currently supported by the `remote-object-info` command. Using any other placeholder in -the format string will raise an error. +the format string will return an empty string in its position. Note that the sizes of objects on disk are reported accurately, but care should be taken in drawing conclusions about which refs or objects are diff --git a/builtin/cat-file.c b/builtin/cat-file.c index ea2230be04..5f8a5921cf 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -336,8 +336,18 @@ struct expand_data { * optimized out. */ unsigned skip_object_info : 1; + + /* + * Flags about when an object info is being fetched from remote. + */ + unsigned is_remote:1; +}; +#define EXPAND_DATA_INIT { .mode = S_IFINVALID, .type = OBJ_BAD } + +static const char *remote_object_info_atoms[] = { + "objectname", + "objectsize", }; -#define EXPAND_DATA_INIT { .mode = S_IFINVALID } static int is_atom(const char *atom, const char *s, int slen) { @@ -348,14 +358,31 @@ static int is_atom(const char *atom, const char *s, int slen) static int expand_atom(struct strbuf *sb, const char *atom, int len, struct expand_data *data) { + if (data->is_remote) { + size_t i, allowed_nr = ARRAY_SIZE(remote_object_info_atoms); + for (i = 0; i < allowed_nr; i++) + if (is_atom(remote_object_info_atoms[i], atom, len)) + break; + + /* + * On remote, skip unsupported atoms returning an empty sb, + * honoring how for-each-ref handles known but inapplicable + * atoms (e.g. %(tagger)). + */ + if (i == allowed_nr) + return 1; + } + if (is_atom("objectname", atom, len)) { if (!data->mark_query) strbuf_add_oid_hex(sb, &data->oid); } else if (is_atom("objecttype", atom, len)) { - if (data->mark_query) + if (data->mark_query) { data->info.typep = &data->type; - else - strbuf_addstr(sb, type_name(data->type)); + } else { + const char *t = type_name(data->type); + strbuf_addstr(sb, t ? t : ""); + } } else if (is_atom("objectsize", atom, len)) { if (data->mark_query) data->info.sizep = &data->size; @@ -711,10 +738,6 @@ static int get_remote_info(struct batch_options *opt, CALLOC_ARRAY(*remote_object_info, object_info_oids->nr); gtransport->smart_options->object_info_oids = object_info_oids; - /* 'objectsize' is the only option currently supported */ - if (!strstr(opt->format, "%(objectsize)")) - die(_("%s is currently not supported with remote-object-info"), opt->format); - string_list_append(&object_info_options, "size"); if (object_info_options.nr > 0) { @@ -844,7 +867,9 @@ static void parse_cmd_remote_object_info(struct batch_options *opt, */ data->size = *remote_object_info[i].sizep; opt->batch_mode = BATCH_MODE_INFO; + data->is_remote = 1; batch_object_write(argv[i + 1], output, opt, data, NULL, 0); + data->is_remote = 0; } else { report_object_status(opt, oid_to_hex(&data->oid), &data->oid, "missing"); } diff --git a/t/t1017-cat-file-remote-object-info.sh b/t/t1017-cat-file-remote-object-info.sh index 49b6660934..6bc863c391 100755 --- a/t/t1017-cat-file-remote-object-info.sh +++ b/t/t1017-cat-file-remote-object-info.sh @@ -236,6 +236,21 @@ test_expect_success 'remote-object-info does not die on missing oid like info' ' ) ' +# This tests depends on %(objecttype) not being supported yet, once supported +# it needs to be updated. +test_expect_success 'unsupported placeholder on remote returns empty string' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + echo "" >expect && + git cat-file --batch-command="%(objecttype)" >actual <<-EOF && + remote-object-info "$GIT_DAEMON_URL/parent" $hello_oid + EOF + test_cmp expect actual + ) +' + # Test --batch-command remote-object-info with 'git://' and # transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability test_expect_success 'batch-command remote-object-info git:// fails when transfer.advertiseobjectinfo=false' ' @@ -575,10 +590,12 @@ test_expect_success 'remote-object-info fails on unsupported filter option (obje set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && - test_must_fail git cat-file --batch-command="%(objectsize:disk)" 2>err <<-EOF && + echo "$hello_oid " >expect && + + git cat-file --batch-command="%(objectname) %(objectsize:disk)" >actual <<-EOF && remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid EOF - test_grep "%(objectsize:disk) is currently not supported with remote-object-info" err + test_cmp expect actual ) ' @@ -587,10 +604,12 @@ test_expect_success 'remote-object-info fails on unsupported filter option (delt set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && - test_must_fail git cat-file --batch-command="%(deltabase)" 2>err <<-EOF && + echo "" >expect && + + git cat-file --batch-command="%(deltabase)" >actual <<-EOF && remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid EOF - test_grep "%(deltabase) is currently not supported with remote-object-info" err + test_cmp expect actual ) ' From b212a666d25f499755731495cea7afc73e6a7063 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Wed, 1 Jul 2026 14:18:47 +0200 Subject: [PATCH 13/13] cat-file: make remote-object-info allow-list dynamic The static allow-list in expand_atom() is hardcoded to only allow "objectname" and "objectsize" for remote queries. This works because up to this point all servers will either support object-info with name and size or they do not support them at all, but we cannot expect that in a future different servers with different git versions to have the same object-info capabilities. Therefore, the allow_list needs to be dynamic depending on what the server advertises. The client will now: 1. Request the protocol option that the placeholder refers to (i.e. "size" when "%(objectsize)"). 2. Filters the request in fetch_object_info() dropping any option that the server does not advertise. 3. After the fetching, the options that haven't been dropped are the ones fetched and supported by the server, these supported options are mapped and remote_allowed_atoms is populated with the placeholders. 4. expand_atom() checks remote_allowed_atoms with the same behaviour as the static allow_list had. Move object_info_options out of get_remote_info so the caller which has data can select what options will be requested instead of requesting always size. Move batch_object_write() out so there will always be an output even if all the placeholders are not supported by the server (returns an empty line). Include "type" in the object_info_options so once the server supports it, the clients know already how to request it. Mentored-by: Karthik Nayak Mentored-by: Chandra Pratap Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 97 ++++++++++++++++++++++++++++++--------------- fetch-object-info.c | 20 ++++++++++ 2 files changed, 84 insertions(+), 33 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 5f8a5921cf..03afc2c550 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -341,13 +341,11 @@ struct expand_data { * Flags about when an object info is being fetched from remote. */ unsigned is_remote:1; -}; -#define EXPAND_DATA_INIT { .mode = S_IFINVALID, .type = OBJ_BAD } -static const char *remote_object_info_atoms[] = { - "objectname", - "objectsize", + struct string_list remote_allowed_atoms; }; +#define EXPAND_DATA_INIT { .mode = S_IFINVALID, .type = OBJ_BAD, \ + .remote_allowed_atoms = STRING_LIST_INIT_NODUP } static int is_atom(const char *atom, const char *s, int slen) { @@ -359,17 +357,11 @@ static int expand_atom(struct strbuf *sb, const char *atom, int len, struct expand_data *data) { if (data->is_remote) { - size_t i, allowed_nr = ARRAY_SIZE(remote_object_info_atoms); - for (i = 0; i < allowed_nr; i++) - if (is_atom(remote_object_info_atoms[i], atom, len)) + size_t i; + for (i = 0; i < data->remote_allowed_atoms.nr; i++) + if (is_atom(data->remote_allowed_atoms.items[i].string, atom, len)) break; - - /* - * On remote, skip unsupported atoms returning an empty sb, - * honoring how for-each-ref handles known but inapplicable - * atoms (e.g. %(tagger)). - */ - if (i == allowed_nr) + if (i == data->remote_allowed_atoms.nr) return 1; } @@ -685,12 +677,12 @@ static int get_remote_info(struct batch_options *opt, int argc, const char **argv, struct object_info **remote_object_info, - struct oid_array *object_info_oids) + struct oid_array *object_info_oids, + struct string_list *object_info_options) { int retval = 0; struct remote *remote = NULL; struct object_id oid; - struct string_list object_info_options = STRING_LIST_INIT_NODUP; struct transport *gtransport; /* @@ -738,15 +730,12 @@ static int get_remote_info(struct batch_options *opt, CALLOC_ARRAY(*remote_object_info, object_info_oids->nr); gtransport->smart_options->object_info_oids = object_info_oids; - string_list_append(&object_info_options, "size"); - - if (object_info_options.nr > 0) { - gtransport->smart_options->object_info_options = &object_info_options; + if (object_info_options->nr > 0) { + gtransport->smart_options->object_info_options = object_info_options; gtransport->smart_options->object_info_data = *remote_object_info; retval = transport_fetch_object_info(gtransport); } cleanup: - string_list_clear(&object_info_options, 0); transport_disconnect(gtransport); return retval; } @@ -832,6 +821,21 @@ static void parse_cmd_mailmap(struct batch_options *opt UNUSED, load_mailmap(); } +struct protocol_placeholder_entry { + const char *option; + const char *atom; +}; + +static const struct protocol_placeholder_entry remote_atom_map[] = { + {"size", "objectsize"}, + {"type", "objecttype"}, + /* + * Add new protocol options here. Even if the server doesn't support + * them the allow_list will drop them if the server doesn't advertise + * them. + */ +}; + static void parse_cmd_remote_object_info(struct batch_options *opt, const char *line, struct strbuf *output, struct expand_data *data) @@ -841,6 +845,7 @@ static void parse_cmd_remote_object_info(struct batch_options *opt, char *line_to_split; struct object_info *remote_object_info = NULL; struct oid_array object_info_oids = OID_ARRAY_INIT; + struct string_list object_info_options = STRING_LIST_INIT_NODUP; if (strlen(line) >= MAX_REMOTE_OBJ_INFO_LINE) die(_("remote-object-info command too long")); @@ -853,32 +858,57 @@ static void parse_cmd_remote_object_info(struct batch_options *opt, die(_("remote-object-info supports at most %d objects"), MAX_ALLOWED_OBJ_LIMIT); + if (data->info.sizep) + string_list_append(&object_info_options, "size"); + if (data->info.typep) + string_list_append(&object_info_options, "type"); + if (get_remote_info(opt, count, argv, &remote_object_info, - &object_info_oids)) + &object_info_oids, &object_info_options)) goto cleanup; + string_list_clear(&data->remote_allowed_atoms, 0); + string_list_append(&data->remote_allowed_atoms, "objectname"); + for (size_t i = 0; i < ARRAY_SIZE(remote_atom_map); i++) + if (unsorted_string_list_has_string(&object_info_options, remote_atom_map[i].option)) + string_list_append(&data->remote_allowed_atoms, + remote_atom_map[i].atom); + data->skip_object_info = 1; for (size_t i = 0; i < object_info_oids.nr; i++) { + int found = 0; data->oid = object_info_oids.oid[i]; + /* + * When reaching here, it means remote-object-info can retrieve + * information from server without downloading them. + */ if (remote_object_info[i].sizep) { - /* - * When reaching here, it means remote-object-info can retrieve - * information from server without downloading them. - */ data->size = *remote_object_info[i].sizep; - opt->batch_mode = BATCH_MODE_INFO; - data->is_remote = 1; - batch_object_write(argv[i + 1], output, opt, data, NULL, 0); - data->is_remote = 0; - } else { - report_object_status(opt, oid_to_hex(&data->oid), &data->oid, "missing"); + found = 1; } + + if (remote_object_info[i].typep) { + data->type = *remote_object_info[i].typep; + found = 1; + } + + if (!found && object_info_options.nr > 0) { + report_object_status(opt, oid_to_hex(&data->oid), + &data->oid, "missing"); + continue; + } + + opt->batch_mode = BATCH_MODE_INFO; + data->is_remote = 1; + batch_object_write(argv[i + 1], output, opt, data, NULL, 0); + data->is_remote = 0; } data->skip_object_info = 0; cleanup: for (size_t i = 0; i < object_info_oids.nr; i++) free_object_info_contents(&remote_object_info[i]); + string_list_clear(&object_info_options, 0); free(line_to_split); free(argv); free(remote_object_info); @@ -1194,6 +1224,7 @@ static int batch_objects(struct batch_options *opt) cleanup: strbuf_release(&input); strbuf_release(&output); + string_list_clear(&data.remote_allowed_atoms, 0); cfg->warn_on_object_refname_ambiguity = save_warning; return retval; } diff --git a/fetch-object-info.c b/fetch-object-info.c index 03cfb70338..e968341676 100644 --- a/fetch-object-info.c +++ b/fetch-object-info.c @@ -41,6 +41,26 @@ int fetch_object_info(const enum protocol_version version, struct object_info_ar case protocol_v2: if (!server_supports_v2("object-info")) die(_("object-info capability is not enabled on the server")); + /* + * When removing an element from the list it gets swapped by the + * last element, iterate backwards to prevent elements skipping + * evaluation. + * + * object_info_options->nr can be safely casted without overflow + * beacuse the number of options is a small known number (the + * supported placeholders which currently are size and type). + */ + for (int i = (int)args->object_info_options->nr - 1; i >= 0; i--) + if (!server_supports_feature("object-info", + args->object_info_options->items[i].string, 0)) + unsorted_string_list_delete_item(args->object_info_options, i, 0); + /* + * If no options are left after the filtering, avoid unnecessary + * request to the server. + */ + if (!args->object_info_options->nr) + return 0; + send_object_info_request(fd_out, args); break; case protocol_v1: