From 4947367267cbcd0ca528711b2393613e2e817878 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 1 Sep 2011 15:43:33 -0700 Subject: [PATCH 1/3] list-objects: pass callback data to show_objects() The traverse_commit_list() API takes two callback functions, one to show commit objects, and the other to show other kinds of objects. Even though the former has a callback data parameter, so that the callback does not have to rely on global state, the latter does not. Give the show_objects() callback the same callback data parameter. Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 +++- builtin/rev-list.c | 10 +++++++--- list-objects.c | 28 +++++++++++++++++----------- list-objects.h | 5 ++--- upload-pack.c | 4 +++- 5 files changed, 32 insertions(+), 19 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f402a843bb..fca6cb5635 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1936,7 +1936,9 @@ static void show_commit(struct commit *commit, void *data) commit->object.flags |= OBJECT_ADDED; } -static void show_object(struct object *obj, const struct name_path *path, const char *last) +static void show_object(struct object *obj, + const struct name_path *path, const char *last, + void *data) { char *name = path_name(path, last); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index f5ce4873e3..920b91c0c3 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -168,15 +168,19 @@ static void finish_commit(struct commit *commit, void *data) commit->buffer = NULL; } -static void finish_object(struct object *obj, const struct name_path *path, const char *name) +static void finish_object(struct object *obj, + const struct name_path *path, const char *name, + void *cb_data) { if (obj->type == OBJ_BLOB && !has_sha1_file(obj->sha1)) die("missing blob object '%s'", sha1_to_hex(obj->sha1)); } -static void show_object(struct object *obj, const struct name_path *path, const char *component) +static void show_object(struct object *obj, + const struct name_path *path, const char *component, + void *cb_data) { - finish_object(obj, path, component); + finish_object(obj, path, component, cb_data); show_object_with_name(stdout, obj, path, component); } diff --git a/list-objects.c b/list-objects.c index 0fb44e7ed7..39d80c0175 100644 --- a/list-objects.c +++ b/list-objects.c @@ -12,7 +12,8 @@ static void process_blob(struct rev_info *revs, struct blob *blob, show_object_fn show, struct name_path *path, - const char *name) + const char *name, + void *cb_data) { struct object *obj = &blob->object; @@ -23,7 +24,7 @@ static void process_blob(struct rev_info *revs, if (obj->flags & (UNINTERESTING | SEEN)) return; obj->flags |= SEEN; - show(obj, path, name); + show(obj, path, name, cb_data); } /* @@ -52,7 +53,8 @@ static void process_gitlink(struct rev_info *revs, const unsigned char *sha1, show_object_fn show, struct name_path *path, - const char *name) + const char *name, + void *cb_data) { /* Nothing to do */ } @@ -62,7 +64,8 @@ static void process_tree(struct rev_info *revs, show_object_fn show, struct name_path *path, struct strbuf *base, - const char *name) + const char *name, + void *cb_data) { struct object *obj = &tree->object; struct tree_desc desc; @@ -80,7 +83,7 @@ static void process_tree(struct rev_info *revs, if (parse_tree(tree) < 0) die("bad tree object %s", sha1_to_hex(obj->sha1)); obj->flags |= SEEN; - show(obj, path, name); + show(obj, path, name, cb_data); me.up = path; me.elem = name; me.elem_len = strlen(name); @@ -106,14 +109,17 @@ static void process_tree(struct rev_info *revs, if (S_ISDIR(entry.mode)) process_tree(revs, lookup_tree(entry.sha1), - show, &me, base, entry.path); + show, &me, base, entry.path, + cb_data); else if (S_ISGITLINK(entry.mode)) process_gitlink(revs, entry.sha1, - show, &me, entry.path); + show, &me, entry.path, + cb_data); else process_blob(revs, lookup_blob(entry.sha1), - show, &me, entry.path); + show, &me, entry.path, + cb_data); } strbuf_setlen(base, baselen); free(tree->buffer); @@ -185,17 +191,17 @@ void traverse_commit_list(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, NULL, name); + show_object(obj, NULL, name, data); continue; } if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - NULL, &base, name); + NULL, &base, name, data); continue; } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - NULL, name); + NULL, name, data); continue; } die("unknown pending object %s (%s)", diff --git a/list-objects.h b/list-objects.h index d65dbf03e6..3db7bb6fa3 100644 --- a/list-objects.h +++ b/list-objects.h @@ -2,11 +2,10 @@ #define LIST_OBJECTS_H typedef void (*show_commit_fn)(struct commit *, void *); -typedef void (*show_object_fn)(struct object *, const struct name_path *, const char *); -typedef void (*show_edge_fn)(struct commit *); - +typedef void (*show_object_fn)(struct object *, const struct name_path *, const char *, void *); void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *); +typedef void (*show_edge_fn)(struct commit *); void mark_edges_uninteresting(struct commit_list *, struct rev_info *, show_edge_fn); #endif diff --git a/upload-pack.c b/upload-pack.c index 970a1eba13..6be625995d 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -83,7 +83,9 @@ static void show_commit(struct commit *commit, void *data) commit->buffer = NULL; } -static void show_object(struct object *obj, const struct name_path *path, const char *component) +static void show_object(struct object *obj, + const struct name_path *path, const char *component, + void *cb_data) { show_object_with_name(pack_pipe, obj, path, component); } From 5a48d24012fa39cdd02c1cb614db2e62d445e2ce Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 1 Sep 2011 15:43:34 -0700 Subject: [PATCH 2/3] rev-list --verify-object Often we want to verify everything reachable from a given set of commits are present in our repository and connected without a gap to the tips of our refs. We used to do this for this purpose: $ rev-list --objects $commits_to_be_tested --not --all Even though this is good enough for catching missing commits and trees, we show the object name but do not verify their existence, let alone their well-formedness, for the blob objects at the leaf level. Add a new "--verify-object" option so that we can catch missing and broken blobs as well. Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 4 ++++ revision.c | 5 +++++ revision.h | 1 + 3 files changed, 10 insertions(+) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 920b91c0c3..ab3be7ca82 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -180,7 +180,11 @@ static void show_object(struct object *obj, const struct name_path *path, const char *component, void *cb_data) { + struct rev_info *info = cb_data; + finish_object(obj, path, component, cb_data); + if (info->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT) + parse_object(obj->sha1); show_object_with_name(stdout, obj, path, component); } diff --git a/revision.c b/revision.c index 072ddac470..5ef498b304 100644 --- a/revision.c +++ b/revision.c @@ -1383,6 +1383,11 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->tree_objects = 1; revs->blob_objects = 1; revs->edge_hint = 1; + } else if (!strcmp(arg, "--verify-objects")) { + revs->tag_objects = 1; + revs->tree_objects = 1; + revs->blob_objects = 1; + revs->verify_objects = 1; } else if (!strcmp(arg, "--unpacked")) { revs->unpacked = 1; } else if (!prefixcmp(arg, "--unpacked=")) { diff --git a/revision.h b/revision.h index da00a58fa5..648876b35d 100644 --- a/revision.h +++ b/revision.h @@ -53,6 +53,7 @@ struct rev_info { tag_objects:1, tree_objects:1, blob_objects:1, + verify_objects:1, edge_hint:1, limited:1, unpacked:1, From 6b67e0dc068d1bfd07686071b70f60078380666f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 1 Sep 2011 15:43:35 -0700 Subject: [PATCH 3/3] fetch: verify we have everything we need before updating our ref The "git fetch" command works in two phases. The remote side tells us what objects are at the tip of the refs we are fetching from, and transfers the objects missing from our side. After storing the objects in our repository, we update our remote tracking branches to point at the updated tips of the refs. A broken or malicious remote side could send a perfectly well-formed pack data during the object transfer phase, but there is no guarantee that the given data actually fill the gap between the objects we originally had and the refs we are updating to. Although this kind of breakage can be caught by running fsck after a fetch, it is much cheaper to verify that everything that is reachable from the tips of the refs we fetched are indeed fully connected to the tips of our current set of refs before we update them. Signed-off-by: Junio C Hamano --- builtin/fetch.c | 119 +++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index f9c41da475..bdb03ff54c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -345,6 +345,64 @@ static int update_local_ref(struct ref *ref, } } +/* + * The ref_map records the tips of the refs we are fetching. If + * + * $ git rev-list --verify-objects --stdin --not --all + * + * (feeding all the refs in ref_map on its standard input) does not + * error out, that means everything reachable from these updated refs + * locally exists and is connected to some of our existing refs. + * + * Returns 0 if everything is connected, non-zero otherwise. + */ +static int check_everything_connected(struct ref *ref_map, int quiet) +{ + struct child_process rev_list; + const char *argv[] = {"rev-list", "--verify-objects", + "--stdin", "--not", "--all", NULL, NULL}; + char commit[41]; + struct ref *ref; + int err = 0; + + if (!ref_map) + return 0; + + if (quiet) + argv[5] = "--quiet"; + + memset(&rev_list, 0, sizeof(rev_list)); + rev_list.argv = argv; + rev_list.git_cmd = 1; + rev_list.in = -1; + rev_list.no_stdout = 1; + rev_list.no_stderr = quiet; + if (start_command(&rev_list)) + return error(_("Could not run 'git rev-list'")); + + sigchain_push(SIGPIPE, SIG_IGN); + + memcpy(commit + 40, "\n", 2); + for (ref = ref_map; ref; ref = ref->next) { + memcpy(commit, sha1_to_hex(ref->old_sha1), 40); + if (write_in_full(rev_list.in, commit, 41) < 0) { + if (errno != EPIPE && errno != EINVAL) + error(_("failed write to rev-list: %s"), + strerror(errno)); + err = -1; + break; + } + } + if (close(rev_list.in)) { + error(_("failed to close rev-list's stdin: %s"), strerror(errno)); + err = -1; + } + + sigchain_pop(SIGPIPE); + + return finish_command(&rev_list) || err; +} + static int store_updated_refs(const char *raw_url, const char *remote_name, struct ref *ref_map) { @@ -364,6 +422,10 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, url = transport_anonymize_url(raw_url); else url = xstrdup("foreign"); + + if (check_everything_connected(ref_map, 0)) + return error(_("%s did not send all necessary objects\n"), url); + for (rm = ref_map; rm; rm = rm->next) { struct ref *ref = NULL; @@ -457,24 +519,9 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, * We would want to bypass the object transfer altogether if * everything we are going to fetch already exists and is connected * locally. - * - * The refs we are going to fetch are in ref_map. If running - * - * $ git rev-list --objects --stdin --not --all - * - * (feeding all the refs in ref_map on its standard input) - * does not error out, that means everything reachable from the - * refs we are going to fetch exists and is connected to some of - * our existing refs. */ static int quickfetch(struct ref *ref_map) { - struct child_process revlist; - struct ref *ref; - int err; - const char *argv[] = {"rev-list", - "--quiet", "--objects", "--stdin", "--not", "--all", NULL}; - /* * If we are deepening a shallow clone we already have these * objects reachable. Running rev-list here will return with @@ -484,47 +531,7 @@ static int quickfetch(struct ref *ref_map) */ if (depth) return -1; - - if (!ref_map) - return 0; - - memset(&revlist, 0, sizeof(revlist)); - revlist.argv = argv; - revlist.git_cmd = 1; - revlist.no_stdout = 1; - revlist.no_stderr = 1; - revlist.in = -1; - - err = start_command(&revlist); - if (err) { - error(_("could not run rev-list")); - return err; - } - - /* - * If rev-list --stdin encounters an unknown commit, it terminates, - * which will cause SIGPIPE in the write loop below. - */ - sigchain_push(SIGPIPE, SIG_IGN); - - for (ref = ref_map; ref; ref = ref->next) { - if (write_in_full(revlist.in, sha1_to_hex(ref->old_sha1), 40) < 0 || - write_str_in_full(revlist.in, "\n") < 0) { - if (errno != EPIPE && errno != EINVAL) - error(_("failed write to rev-list: %s"), strerror(errno)); - err = -1; - break; - } - } - - if (close(revlist.in)) { - error(_("failed to close rev-list's stdin: %s"), strerror(errno)); - err = -1; - } - - sigchain_pop(SIGPIPE); - - return finish_command(&revlist) || err; + return check_everything_connected(ref_map, 1); } static int fetch_refs(struct transport *transport, struct ref *ref_map)