diff --git a/builtin/fsck.c b/builtin/fsck.c index ef78c6c00c..028aba52eb 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -337,7 +337,7 @@ static void check_connectivity(void) } } -static int fsck_obj(struct object *obj) +static int fsck_obj(struct object *obj, void *buffer, unsigned long size) { int err; @@ -351,7 +351,7 @@ static int fsck_obj(struct object *obj) if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); - err = fsck_object(obj, NULL, 0, &fsck_obj_options); + err = fsck_object(obj, buffer, size, &fsck_obj_options); if (err) goto out; @@ -396,7 +396,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - return fsck_obj(obj); + return fsck_obj(obj, buffer, size); } static int default_refs; @@ -504,44 +504,42 @@ static void get_default_heads(void) } } -static struct object *parse_loose_object(const struct object_id *oid, - const char *path) +static int fsck_loose(const struct object_id *oid, const char *path, void *data) { struct object *obj; - void *contents; enum object_type type; unsigned long size; + void *contents; int eaten; - if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) - return NULL; + if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) { + errors_found |= ERROR_OBJECT; + error("%s: object corrupt or missing: %s", + oid_to_hex(oid), path); + return 0; /* keep checking other objects */ + } if (!contents && type != OBJ_BLOB) - die("BUG: read_loose_object streamed a non-blob"); + BUG("read_loose_object streamed a non-blob"); obj = parse_object_buffer(oid, type, size, contents, &eaten); - - if (!eaten) - free(contents); - return obj; -} - -static int fsck_loose(const struct object_id *oid, const char *path, void *data) -{ - struct object *obj = parse_loose_object(oid, path); - if (!obj) { errors_found |= ERROR_OBJECT; - error("%s: object corrupt or missing: %s", + error("%s: object could not be parsed: %s", oid_to_hex(oid), path); + if (!eaten) + free(contents); return 0; /* keep checking other objects */ } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - if (fsck_obj(obj)) + if (fsck_obj(obj, contents, size)) errors_found |= ERROR_OBJECT; - return 0; + + if (!eaten) + free(contents); + return 0; /* keep checking other objects, even if we saw an error */ } static int fsck_cruft(const char *basename, const char *path, void *data) @@ -750,6 +748,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } stop_progress(&progress); } + + if (fsck_finish(&fsck_obj_options)) + errors_found |= ERROR_OBJECT; } for (i = 0; i < argc; i++) { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index bda84a92ef..7b2f7c0470 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -836,6 +836,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, blob->object.flags |= FLAG_CHECKED; else die(_("invalid blob object %s"), oid_to_hex(oid)); + if (do_fsck_object && + fsck_object(&blob->object, (void *)data, size, &fsck_options)) + die(_("fsck error in packed object")); } else { struct object *obj; int eaten; @@ -853,7 +856,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, die(_("invalid %s"), type_name(type)); if (do_fsck_object && fsck_object(obj, buf, size, &fsck_options)) - die(_("Error in object")); + die(_("fsck error in packed object")); if (strict && fsck_walk(obj, NULL, &fsck_options)) die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); @@ -1477,6 +1480,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, } else chmod(final_index_name, 0444); + if (do_fsck_object) + add_packed_git(final_index_name, strlen(final_index_name), 0); + if (!from_stdin) { printf("%s\n", sha1_to_hex(hash)); } else { @@ -1818,6 +1824,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) pack_hash); else close(input_fd); + + if (do_fsck_object && fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + free(objects); strbuf_release(&index_name_buf); if (pack_name == NULL) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6620feec68..c8f1406d23 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -210,7 +210,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op if (!obj_buf) die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid)); if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options)) - die("Error in object"); + die("fsck error in packed object"); fsck_options.walk = check_object; if (fsck_walk(obj, NULL, &fsck_options)) die("Error on reachable objects of %s", oid_to_hex(&obj->oid)); @@ -572,8 +572,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); the_hash_algo->final_fn(oid.hash, &ctx); - if (strict) + if (strict) { write_rest(); + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } if (hashcmp(fill(the_hash_algo->rawsz), oid.hash)) die("final sha1 did not match"); use(the_hash_algo->rawsz); diff --git a/fsck.c b/fsck.c index 5c8c12dde3..9339f31513 100644 --- a/fsck.c +++ b/fsck.c @@ -10,6 +10,13 @@ #include "utf8.h" #include "sha1-array.h" #include "decorate.h" +#include "oidset.h" +#include "packfile.h" +#include "submodule-config.h" +#include "config.h" + +static struct oidset gitmodules_found = OIDSET_INIT; +static struct oidset gitmodules_done = OIDSET_INIT; #define FSCK_FATAL -1 #define FSCK_INFO -2 @@ -44,6 +51,7 @@ FUNC(MISSING_TAG_ENTRY, ERROR) \ FUNC(MISSING_TAG_OBJECT, ERROR) \ FUNC(MISSING_TREE, ERROR) \ + FUNC(MISSING_TREE_OBJECT, ERROR) \ FUNC(MISSING_TYPE, ERROR) \ FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \ @@ -51,6 +59,11 @@ FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ + FUNC(GITMODULES_MISSING, ERROR) \ + FUNC(GITMODULES_BLOB, ERROR) \ + FUNC(GITMODULES_PARSE, ERROR) \ + FUNC(GITMODULES_NAME, ERROR) \ + FUNC(GITMODULES_SYMLINK, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -561,10 +574,18 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_empty_name |= !*name; has_dot |= !strcmp(name, "."); has_dotdot |= !strcmp(name, ".."); - has_dotgit |= (!strcmp(name, ".git") || - is_hfs_dotgit(name) || - is_ntfs_dotgit(name)); + has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; + + if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) { + if (!S_ISLNK(mode)) + oidset_insert(&gitmodules_found, oid); + else + retval += report(options, &item->object, + FSCK_MSG_GITMODULES_SYMLINK, + ".gitmodules is a symbolic link"); + } + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; @@ -901,6 +922,66 @@ static int fsck_tag(struct tag *tag, const char *data, return fsck_tag_buffer(tag, data, size, options); } +struct fsck_gitmodules_data { + struct object *obj; + struct fsck_options *options; + int ret; +}; + +static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) +{ + struct fsck_gitmodules_data *data = vdata; + const char *subsection, *key; + int subsection_len; + char *name; + + if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || + !subsection) + return 0; + + name = xmemdupz(subsection, subsection_len); + if (check_submodule_name(name) < 0) + data->ret |= report(data->options, data->obj, + FSCK_MSG_GITMODULES_NAME, + "disallowed submodule name: %s", + name); + free(name); + + return 0; +} + +static int fsck_blob(struct blob *blob, const char *buf, + unsigned long size, struct fsck_options *options) +{ + struct fsck_gitmodules_data data; + + if (!oidset_contains(&gitmodules_found, &blob->object.oid)) + return 0; + oidset_insert(&gitmodules_done, &blob->object.oid); + + if (!buf) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + ".gitmodules too large to parse"); + } + + data.obj = &blob->object; + data.options = options; + data.ret = 0; + if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, + ".gitmodules", buf, size, &data)) + data.ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + "could not parse gitmodules blob"); + + return data.ret; +} + int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options) { @@ -908,7 +989,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size, return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck"); if (obj->type == OBJ_BLOB) - return 0; + return fsck_blob((struct blob *)obj, data, size, options); if (obj->type == OBJ_TREE) return fsck_tree((struct tree *) obj, options); if (obj->type == OBJ_COMMIT) @@ -932,3 +1013,52 @@ int fsck_error_function(struct fsck_options *o, error("object %s: %s", describe_object(o, obj), message); return 1; } + +int fsck_finish(struct fsck_options *options) +{ + int ret = 0; + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&gitmodules_found, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct blob *blob; + enum object_type type; + unsigned long size; + char *buf; + + if (oidset_contains(&gitmodules_done, oid)) + continue; + + blob = lookup_blob(oid); + if (!blob) { + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + continue; + } + + buf = read_sha1_file(oid->hash, &type, &size); + if (!buf) { + if (is_promisor_object(&blob->object.oid)) + continue; + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_MISSING, + "unable to read .gitmodules blob"); + continue; + } + + if (type == OBJ_BLOB) + ret |= fsck_blob(blob, buf, size, options); + else + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + free(buf); + } + + + oidset_clear(&gitmodules_found); + oidset_clear(&gitmodules_done); + return ret; +} diff --git a/fsck.h b/fsck.h index 4525510d99..c3cf5e0034 100644 --- a/fsck.h +++ b/fsck.h @@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options); int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options); +/* + * Some fsck checks are context-dependent, and may end up queued; run this + * after completing all fsck_object() calls in order to resolve any remaining + * checks. + */ +int fsck_finish(struct fsck_options *options); + #endif diff --git a/sha1_file.c b/sha1_file.c index cc0f43ea84..53f0a3693f 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2209,7 +2209,7 @@ int read_loose_object(const char *path, goto out; } - if (*type == OBJ_BLOB) { + if (*type == OBJ_BLOB && *size > big_file_threshold) { if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) goto out; } else { diff --git a/t/lib-pack.sh b/t/lib-pack.sh index 7509846571..4674899b30 100644 --- a/t/lib-pack.sh +++ b/t/lib-pack.sh @@ -79,6 +79,18 @@ pack_obj () { ;; esac + # If it's not a delta, we can convince pack-objects to generate a pack + # with just our entry, and then strip off the header (12 bytes) and + # trailer (20 bytes). + if test -z "$2" + then + echo "$1" | git pack-objects --stdout >pack_obj.tmp && + size=$(wc -c &2 "BUG: don't know how to print $1${2:+ (from $2)}" return 1 } diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index 75fa071c6d..a770d92a55 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a real-world setup that confirms we catch this in practice. ' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-pack.sh test_expect_success 'check names' ' cat >expect <<-\EOF && @@ -73,4 +74,81 @@ test_expect_success 'clone evil superproject' ' ! grep "RUNNING POST CHECKOUT" output ' +test_expect_success 'fsck detects evil superproject' ' + test_must_fail git fsck +' + +test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + test_must_fail git push dst.git HEAD +' + +test_expect_success 'transfer.fsckObjects detects evil superproject (index)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + git -C dst.git config transfer.unpackLimit 1 && + test_must_fail git push dst.git HEAD +' + +# Normally our packs contain commits followed by trees followed by blobs. This +# reverses the order, which requires backtracking to find the context of a +# blob. We'll start with a fresh gitmodules-only tree to make it simpler. +test_expect_success 'create oddly ordered pack' ' + git checkout --orphan odd && + git rm -rf --cached . && + git add .gitmodules && + git commit -m odd && + { + pack_header 3 && + pack_obj $(git rev-parse HEAD:.gitmodules) && + pack_obj $(git rev-parse HEAD^{tree}) && + pack_obj $(git rev-parse HEAD) + } >odd.pack && + pack_trailer odd.pack +' + +test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + test_must_fail git -C dst.git unpack-objects --strict output && + grep gitmodulesSymlink output + ) +' + test_done