From 66c78e0653a4e60c625b8400da31da0ba5bd1286 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sat, 15 Nov 2025 00:58:17 +0000 Subject: [PATCH 1/2] object-file: disallow adding submodules of different hash algo The design of the hash algorithm transition plan is that objects stored must be entirely in one algorithm since we lack any way to indicate a mix of algorithms. This also includes submodules, but we have traditionally not enforced this, which leads to various problems when trying to clone or check out the the submodule from the remote. Since this cannot work in the general case, restrict adding a submodule of a different algorithm to the index. Add tests for git add and git submodule add that these are rejected. Note that we cannot check this in git fsck because the malformed submodule is stored in the tree as an object ID which is either truncated (when a SHA-256 submodule is added to a SHA-1 repository) or padded with zeros (when a SHA-1 submodule is added to a SHA-256 repository). We cannot detect even the latter case because someone could have an actual submodule that actually ends in 24 zeros, which would be a false positive. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- object-file.c | 6 +++++- t/t3700-add.sh | 25 +++++++++++++++++++++++++ t/t7400-submodule-basic.sh | 25 +++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 2bc36ab3ee..6ff6cf7549 100644 --- a/object-file.c +++ b/object-file.c @@ -1296,7 +1296,11 @@ int index_path(struct index_state *istate, struct object_id *oid, strbuf_release(&sb); break; case S_IFDIR: - return repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid); + if (repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid)) + return -1; + if (&hash_algos[oid->algo] != istate->repo->hash_algo) + return error(_("cannot add a submodule of a different hash algorithm")); + break; default: return error(_("%s: unsupported file type"), path); } diff --git a/t/t3700-add.sh b/t/t3700-add.sh index df580a5806..9a2c8dbcc2 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -541,6 +541,31 @@ test_expect_success 'all statuses changed in folder if . is given' ' ) ' +test_expect_success 'cannot add a submodule of a different algorithm' ' + git init --object-format=sha256 sha256 && + ( + cd sha256 && + test_commit abc && + git init --object-format=sha1 submodule && + test_commit -C submodule def && + test_must_fail git add submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) && + git init --object-format=sha1 sha1 && + ( + cd sha1 && + test_commit abc && + git init --object-format=sha256 submodule && + test_commit -C submodule def && + test_must_fail git add submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) +' + test_expect_success CASE_INSENSITIVE_FS 'path is case-insensitive' ' path="$(pwd)/BLUB" && touch "$path" && diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index fd3e7e355e..e6b551daad 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -407,6 +407,31 @@ test_expect_success 'submodule add in subdirectory with relative path should fai test_grep toplevel output.err ' +test_expect_success 'submodule add of a different algorithm fails' ' + git init --object-format=sha256 sha256 && + ( + cd sha256 && + test_commit abc && + git init --object-format=sha1 submodule && + test_commit -C submodule def && + test_must_fail git submodule add "$submodurl" submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) && + git init --object-format=sha1 sha1 && + ( + cd sha1 && + test_commit abc && + git init --object-format=sha256 submodule && + test_commit -C submodule def && + test_must_fail git submodule add "$submodurl" submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) +' + test_expect_success 'setup - add an example entry to .gitmodules' ' git config --file=.gitmodules submodule.example.url git://example.com/init.git ' From 6fe288bfbcbbabc3d399dd71f876dccf71affff0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 15 Nov 2025 00:58:18 +0000 Subject: [PATCH 2/2] read-cache: drop submodule check from add_to_cache() In add_to_cache(), we treat any directories as submodules, and complain if we can't resolve their HEAD. This call to resolve_gitlink_ref() was added by f937bc2f86 (add: error appropriately on repository with no commits, 2019-04-09), with the goal of improving the error message for empty repositories. But we already resolve the submodule HEAD in index_path(), which is where we find the actual oid we're going to use. Resolving it again here introduces some downsides: 1. It's more work, since we have to open up the submodule repository's files twice. 2. There are call paths that get to index_path() without going through add_to_cache(). For instance, we'd want a similar informative message if "git diff empty" finds that it can't resolve the submodule's HEAD. (In theory we can also get there through update-index, but AFAICT it refuses to consider directories as submodules at all, and just complains about them). 3. The resolution in index_path() catches more errors that we don't handle here. In particular, it will validate that the object format for the submodule matches that of the superproject. This isn't a bug, since our call in add_to_cache() throws away the oid it gets without looking at it. But it certainly caused confusion for me when looking at where the object-format check should go. So instead of resolving the submodule HEAD in add_to_cache(), let's just teach the call in index_path() to actually produce an error message (which it already does for other cases). That's probably what f937bc2f86 should have done in the first place, and it gives us a single point of resolution when adding a submodule to the index. The resulting output is slightly more verbose, as we propagate the error up the call stack, but I think that's OK (and again, matches many other errors we get when indexing fails). I've left the text of the error message as-is, though it is perhaps overly specific. There are many reasons that resolving the submodule HEAD might fail, though outside of corruption or system errors it is probably most likely that the submodule HEAD is simply on an unborn branch. Signed-off-by: Jeff King Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- object-file.c | 2 +- read-cache.c | 3 --- t/t3700-add.sh | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/object-file.c b/object-file.c index 6ff6cf7549..bb0c77b45d 100644 --- a/object-file.c +++ b/object-file.c @@ -1297,7 +1297,7 @@ int index_path(struct index_state *istate, struct object_id *oid, break; case S_IFDIR: if (repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid)) - return -1; + return error(_("'%s' does not have a commit checked out"), path); if (&hash_algos[oid->algo] != istate->repo->hash_algo) return error(_("cannot add a submodule of a different hash algorithm")); break; diff --git a/read-cache.c b/read-cache.c index 06ad74db22..e34c5c56c6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -707,7 +707,6 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, int add_option = (ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE| (intent_only ? ADD_CACHE_NEW_ONLY : 0)); unsigned hash_flags = pretend ? 0 : INDEX_WRITE_OBJECT; - struct object_id oid; if (flags & ADD_CACHE_RENORMALIZE) hash_flags |= INDEX_RENORMALIZE; @@ -717,8 +716,6 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, namelen = strlen(path); if (S_ISDIR(st_mode)) { - if (repo_resolve_gitlink_ref(the_repository, path, "HEAD", &oid) < 0) - return error(_("'%s' does not have a commit checked out"), path); while (namelen && path[namelen-1] == '/') namelen--; } diff --git a/t/t3700-add.sh b/t/t3700-add.sh index 9a2c8dbcc2..af93e53c12 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -388,6 +388,7 @@ test_expect_success 'error on a repository with no commits' ' test_must_fail git add empty >actual 2>&1 && cat >expect <<-EOF && error: '"'empty/'"' does not have a commit checked out + error: unable to index file '"'empty/'"' fatal: adding files failed EOF test_cmp expect actual