From 6d4bb3833c3d2114d9c4e6d028e961c3fba8b8b3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 1 Sep 2011 15:43:35 -0700 Subject: [PATCH 1/4] fetch: verify we have everything we need before updating our ref The "git fetch" command works in two phases. The remote side tells us what objects are at the tip of the refs we are fetching from, and transfers the objects missing from our side. After storing the objects in our repository, we update our remote tracking branches to point at the updated tips of the refs. A broken or malicious remote side could send a perfectly well-formed pack data during the object transfer phase, but there is no guarantee that the given data actually fill the gap between the objects we originally had and the refs we are updating to. Although this kind of breakage can be caught by running fsck after a fetch, it is much cheaper to verify that everything that is reachable from the tips of the refs we fetched are indeed fully connected to the tips of our current set of refs before we update them. Signed-off-by: Junio C Hamano --- builtin/fetch.c | 119 +++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index f9c41da475..fc254b6b96 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -345,6 +345,64 @@ static int update_local_ref(struct ref *ref, } } +/* + * The ref_map records the tips of the refs we are fetching. If + * + * $ git rev-list --verify-objects --stdin --not --all + * + * (feeding all the refs in ref_map on its standard input) does not + * error out, that means everything reachable from these updated refs + * locally exists and is connected to some of our existing refs. + * + * Returns 0 if everything is connected, non-zero otherwise. + */ +static int check_everything_connected(struct ref *ref_map, int quiet) +{ + struct child_process rev_list; + const char *argv[] = {"rev-list", "--verify-objects", + "--stdin", "--not", "--all", NULL, NULL}; + char commit[41]; + struct ref *ref; + int err = 0; + + if (!ref_map) + return 0; + + if (quiet) + argv[5] = "--quiet"; + + memset(&rev_list, 0, sizeof(rev_list)); + rev_list.argv = argv; + rev_list.git_cmd = 1; + rev_list.in = -1; + rev_list.no_stdout = 1; + rev_list.no_stderr = quiet; + if (start_command(&rev_list)) + return error(_("Could not run 'git rev-list'")); + + sigchain_push(SIGPIPE, SIG_IGN); + + commit[40] = '\n'; + for (ref = ref_map; ref; ref = ref->next) { + memcpy(commit, sha1_to_hex(ref->old_sha1), 40); + if (write_in_full(rev_list.in, commit, 41) < 0) { + if (errno != EPIPE && errno != EINVAL) + error(_("failed write to rev-list: %s"), + strerror(errno)); + err = -1; + break; + } + } + if (close(rev_list.in)) { + error(_("failed to close rev-list's stdin: %s"), strerror(errno)); + err = -1; + } + + sigchain_pop(SIGPIPE); + + return finish_command(&rev_list) || err; +} + static int store_updated_refs(const char *raw_url, const char *remote_name, struct ref *ref_map) { @@ -364,6 +422,10 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, url = transport_anonymize_url(raw_url); else url = xstrdup("foreign"); + + if (check_everything_connected(ref_map, 0)) + return error(_("%s did not send all necessary objects\n"), url); + for (rm = ref_map; rm; rm = rm->next) { struct ref *ref = NULL; @@ -457,24 +519,9 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, * We would want to bypass the object transfer altogether if * everything we are going to fetch already exists and is connected * locally. - * - * The refs we are going to fetch are in ref_map. If running - * - * $ git rev-list --objects --stdin --not --all - * - * (feeding all the refs in ref_map on its standard input) - * does not error out, that means everything reachable from the - * refs we are going to fetch exists and is connected to some of - * our existing refs. */ static int quickfetch(struct ref *ref_map) { - struct child_process revlist; - struct ref *ref; - int err; - const char *argv[] = {"rev-list", - "--quiet", "--objects", "--stdin", "--not", "--all", NULL}; - /* * If we are deepening a shallow clone we already have these * objects reachable. Running rev-list here will return with @@ -484,47 +531,7 @@ static int quickfetch(struct ref *ref_map) */ if (depth) return -1; - - if (!ref_map) - return 0; - - memset(&revlist, 0, sizeof(revlist)); - revlist.argv = argv; - revlist.git_cmd = 1; - revlist.no_stdout = 1; - revlist.no_stderr = 1; - revlist.in = -1; - - err = start_command(&revlist); - if (err) { - error(_("could not run rev-list")); - return err; - } - - /* - * If rev-list --stdin encounters an unknown commit, it terminates, - * which will cause SIGPIPE in the write loop below. - */ - sigchain_push(SIGPIPE, SIG_IGN); - - for (ref = ref_map; ref; ref = ref->next) { - if (write_in_full(revlist.in, sha1_to_hex(ref->old_sha1), 40) < 0 || - write_str_in_full(revlist.in, "\n") < 0) { - if (errno != EPIPE && errno != EINVAL) - error(_("failed write to rev-list: %s"), strerror(errno)); - err = -1; - break; - } - } - - if (close(revlist.in)) { - error(_("failed to close rev-list's stdin: %s"), strerror(errno)); - err = -1; - } - - sigchain_pop(SIGPIPE); - - return finish_command(&revlist) || err; + return check_everything_connected(ref_map, 1); } static int fetch_refs(struct transport *transport, struct ref *ref_map) From f0e278b1b7a2c0ecbecf644a18c1383dc426db30 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 2 Sep 2011 16:22:47 -0700 Subject: [PATCH 2/4] check_everything_connected(): refactor to use an iterator We will be using the same "rev-list --verify-objects" logic to add a sanity check to the receiving end of "git push" in the same way, but the list of commits that are checked come from a structure with a different shape over there. Update the function to take an iterator to make it easier to reuse it in different contexts. Signed-off-by: Junio C Hamano --- builtin/fetch.c | 50 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index fc254b6b96..0ef912eac0 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -346,27 +346,34 @@ static int update_local_ref(struct ref *ref, } /* - * The ref_map records the tips of the refs we are fetching. If + * Take callback data, and return next object name in the buffer. + * When called after returning the name for the last object, return -1 + * to signal EOF, otherwise return 0. + */ +typedef int (*sha1_iterate_fn)(void *, unsigned char [20]); + +/* + * If we feed all the commits we want to verify to this command * * $ git rev-list --verify-objects --stdin --not --all * - * (feeding all the refs in ref_map on its standard input) does not - * error out, that means everything reachable from these updated refs - * locally exists and is connected to some of our existing refs. + * and if it does not error out, that means everything reachable from + * these commits locally exists and is connected to some of our + * existing refs. * * Returns 0 if everything is connected, non-zero otherwise. */ -static int check_everything_connected(struct ref *ref_map, int quiet) +static int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data) { struct child_process rev_list; const char *argv[] = {"rev-list", "--verify-objects", "--stdin", "--not", "--all", NULL, NULL}; char commit[41]; - struct ref *ref; + unsigned char sha1[20]; int err = 0; - if (!ref_map) - return 0; + if (fn(cb_data, sha1)) + return err; if (quiet) argv[5] = "--quiet"; @@ -383,8 +390,8 @@ static int check_everything_connected(struct ref *ref_map, int quiet) sigchain_push(SIGPIPE, SIG_IGN); commit[40] = '\n'; - for (ref = ref_map; ref; ref = ref->next) { - memcpy(commit, sha1_to_hex(ref->old_sha1), 40); + do { + memcpy(commit, sha1_to_hex(sha1), 40); if (write_in_full(rev_list.in, commit, 41) < 0) { if (errno != EPIPE && errno != EINVAL) error(_("failed write to rev-list: %s"), @@ -392,17 +399,29 @@ static int check_everything_connected(struct ref *ref_map, int quiet) err = -1; break; } - } + } while (!fn(cb_data, sha1)); + if (close(rev_list.in)) { error(_("failed to close rev-list's stdin: %s"), strerror(errno)); err = -1; } sigchain_pop(SIGPIPE); - return finish_command(&rev_list) || err; } +static int iterate_ref_map(void *cb_data, unsigned char sha1[20]) +{ + struct ref **rm = cb_data; + struct ref *ref = *rm; + + if (!ref) + return -1; /* end of the list */ + *rm = ref->next; + hashcpy(sha1, ref->old_sha1); + return 0; +} + static int store_updated_refs(const char *raw_url, const char *remote_name, struct ref *ref_map) { @@ -423,7 +442,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, else url = xstrdup("foreign"); - if (check_everything_connected(ref_map, 0)) + rm = ref_map; + if (check_everything_connected(iterate_ref_map, 0, &rm)) return error(_("%s did not send all necessary objects\n"), url); for (rm = ref_map; rm; rm = rm->next) { @@ -522,6 +542,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, */ static int quickfetch(struct ref *ref_map) { + struct ref *rm = ref_map; + /* * If we are deepening a shallow clone we already have these * objects reachable. Running rev-list here will return with @@ -531,7 +553,7 @@ static int quickfetch(struct ref *ref_map) */ if (depth) return -1; - return check_everything_connected(ref_map, 1); + return check_everything_connected(iterate_ref_map, 1, &rm); } static int fetch_refs(struct transport *transport, struct ref *ref_map) From f96400cb467af82e21acb3034dd214f4c8be11d0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 2 Sep 2011 16:33:22 -0700 Subject: [PATCH 3/4] check_everything_connected(): libify Extract the helper function and the type definition of the iterator function it uses out of builtin/fetch.c into a separate source and a header file. Signed-off-by: Junio C Hamano --- Makefile | 2 ++ builtin/fetch.c | 66 +------------------------------------------------ connected.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ connected.h | 20 +++++++++++++++ 4 files changed, 85 insertions(+), 65 deletions(-) create mode 100644 connected.c create mode 100644 connected.h diff --git a/Makefile b/Makefile index e40ac0c7f5..23d7658924 100644 --- a/Makefile +++ b/Makefile @@ -511,6 +511,7 @@ LIB_H += compat/win32/pthread.h LIB_H += compat/win32/syslog.h LIB_H += compat/win32/sys/poll.h LIB_H += compat/win32/dirent.h +LIB_H += connected.h LIB_H += csum-file.h LIB_H += decorate.h LIB_H += delta.h @@ -587,6 +588,7 @@ LIB_OBJS += combine-diff.o LIB_OBJS += commit.o LIB_OBJS += config.o LIB_OBJS += connect.o +LIB_OBJS += connected.o LIB_OBJS += convert.o LIB_OBJS += copy.o LIB_OBJS += csum-file.o diff --git a/builtin/fetch.c b/builtin/fetch.c index 0ef912eac0..ffda063d91 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -13,6 +13,7 @@ #include "sigchain.h" #include "transport.h" #include "submodule.h" +#include "connected.h" static const char * const builtin_fetch_usage[] = { "git fetch [] [ [...]]", @@ -345,71 +346,6 @@ static int update_local_ref(struct ref *ref, } } -/* - * Take callback data, and return next object name in the buffer. - * When called after returning the name for the last object, return -1 - * to signal EOF, otherwise return 0. - */ -typedef int (*sha1_iterate_fn)(void *, unsigned char [20]); - -/* - * If we feed all the commits we want to verify to this command - * - * $ git rev-list --verify-objects --stdin --not --all - * - * and if it does not error out, that means everything reachable from - * these commits locally exists and is connected to some of our - * existing refs. - * - * Returns 0 if everything is connected, non-zero otherwise. - */ -static int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data) -{ - struct child_process rev_list; - const char *argv[] = {"rev-list", "--verify-objects", - "--stdin", "--not", "--all", NULL, NULL}; - char commit[41]; - unsigned char sha1[20]; - int err = 0; - - if (fn(cb_data, sha1)) - return err; - - if (quiet) - argv[5] = "--quiet"; - - memset(&rev_list, 0, sizeof(rev_list)); - rev_list.argv = argv; - rev_list.git_cmd = 1; - rev_list.in = -1; - rev_list.no_stdout = 1; - rev_list.no_stderr = quiet; - if (start_command(&rev_list)) - return error(_("Could not run 'git rev-list'")); - - sigchain_push(SIGPIPE, SIG_IGN); - - commit[40] = '\n'; - do { - memcpy(commit, sha1_to_hex(sha1), 40); - if (write_in_full(rev_list.in, commit, 41) < 0) { - if (errno != EPIPE && errno != EINVAL) - error(_("failed write to rev-list: %s"), - strerror(errno)); - err = -1; - break; - } - } while (!fn(cb_data, sha1)); - - if (close(rev_list.in)) { - error(_("failed to close rev-list's stdin: %s"), strerror(errno)); - err = -1; - } - - sigchain_pop(SIGPIPE); - return finish_command(&rev_list) || err; -} - static int iterate_ref_map(void *cb_data, unsigned char sha1[20]) { struct ref **rm = cb_data; diff --git a/connected.c b/connected.c new file mode 100644 index 0000000000..d7624230d4 --- /dev/null +++ b/connected.c @@ -0,0 +1,62 @@ +#include "cache.h" +#include "run-command.h" +#include "sigchain.h" +#include "connected.h" + +/* + * If we feed all the commits we want to verify to this command + * + * $ git rev-list --verify-objects --stdin --not --all + * + * and if it does not error out, that means everything reachable from + * these commits locally exists and is connected to some of our + * existing refs. + * + * Returns 0 if everything is connected, non-zero otherwise. + */ +int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data) +{ + struct child_process rev_list; + const char *argv[] = {"rev-list", "--verify-objects", + "--stdin", "--not", "--all", NULL, NULL}; + char commit[41]; + unsigned char sha1[20]; + int err = 0; + + if (fn(cb_data, sha1)) + return err; + + if (quiet) + argv[5] = "--quiet"; + + memset(&rev_list, 0, sizeof(rev_list)); + rev_list.argv = argv; + rev_list.git_cmd = 1; + rev_list.in = -1; + rev_list.no_stdout = 1; + rev_list.no_stderr = quiet; + if (start_command(&rev_list)) + return error(_("Could not run 'git rev-list'")); + + sigchain_push(SIGPIPE, SIG_IGN); + + commit[40] = '\n'; + do { + memcpy(commit, sha1_to_hex(sha1), 40); + if (write_in_full(rev_list.in, commit, 41) < 0) { + if (errno != EPIPE && errno != EINVAL) + error(_("failed write to rev-list: %s"), + strerror(errno)); + err = -1; + break; + } + } while (!fn(cb_data, sha1)); + + if (close(rev_list.in)) { + error(_("failed to close rev-list's stdin: %s"), strerror(errno)); + err = -1; + } + + sigchain_pop(SIGPIPE); + return finish_command(&rev_list) || err; +} diff --git a/connected.h b/connected.h new file mode 100644 index 0000000000..7e4585a6cb --- /dev/null +++ b/connected.h @@ -0,0 +1,20 @@ +#ifndef CONNECTED_H +#define CONNECTED_H + +/* + * Take callback data, and return next object name in the buffer. + * When called after returning the name for the last object, return -1 + * to signal EOF, otherwise return 0. + */ +typedef int (*sha1_iterate_fn)(void *, unsigned char [20]); + +/* + * Make sure that our object store has all the commits necessary to + * connect the ancestry chain to some of our existing refs, and all + * the trees and blobs that these commits use. + * + * Return 0 if Ok, non zero otherwise (i.e. some missing objects) + */ +extern int check_everything_connected(sha1_iterate_fn, int quiet, void *cb_data); + +#endif /* CONNECTED_H */ From 52fed6e1ce07250ada3d2d0128015f131e3ad6c1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 2 Sep 2011 16:52:08 -0700 Subject: [PATCH 4/4] receive-pack: check connectivity before concluding "git push" Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index e1a687ad07..42f712318f 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -11,6 +11,7 @@ #include "transport.h" #include "string-list.h" #include "sha1-array.h" +#include "connected.h" static const char receive_pack_usage[] = "git receive-pack "; @@ -546,6 +547,43 @@ static void check_aliased_updates(struct command *commands) string_list_clear(&ref_list, 0); } +static int command_singleton_iterator(void *cb_data, unsigned char sha1[20]) +{ + struct command **cmd_list = cb_data; + struct command *cmd = *cmd_list; + + if (!cmd) + return -1; /* end of list */ + *cmd_list = NULL; /* this returns only one */ + hashcpy(sha1, cmd->new_sha1); + return 0; +} + +static void set_connectivity_errors(struct command *commands) +{ + struct command *cmd; + + for (cmd = commands; cmd; cmd = cmd->next) { + struct command *singleton = cmd; + if (!check_everything_connected(command_singleton_iterator, + 0, &singleton)) + continue; + cmd->error_string = "missing necessary objects"; + } +} + +static int iterate_receive_command_list(void *cb_data, unsigned char sha1[20]) +{ + struct command **cmd_list = cb_data; + struct command *cmd = *cmd_list; + + if (!cmd) + return -1; /* end of list */ + *cmd_list = cmd->next; + hashcpy(sha1, cmd->new_sha1); + return 0; +} + static void execute_commands(struct command *commands, const char *unpacker_error) { struct command *cmd; @@ -557,6 +595,11 @@ static void execute_commands(struct command *commands, const char *unpacker_erro return; } + cmd = commands; + if (check_everything_connected(iterate_receive_command_list, + 0, &cmd)) + set_connectivity_errors(commands); + if (run_receive_hook(commands, pre_receive_hook)) { for (cmd = commands; cmd; cmd = cmd->next) cmd->error_string = "pre-receive hook declined";