submodule: encode gitdir paths to avoid conflicts

This adds a new submoduleEncoding extension which encodes gitdir names
to avoid collisions due to nested gitdirs or case insensitive filesystems.

A custom encoding can become unnecessarily complex, while url-encoding is
relatively well-known, however it needs some extending to support case
insensitive filesystems, hence why A is encoded as _a, B as _b and so on.

Unfortunately encoding A -> _a (...) is not enough to fix the reserved
Windows file names (e.g. COM1) because worktrees still use names like COM1
even if the gitdirs paths are encoded, so future work is needed to fully
address Windows reserved names.

For now url-encoding is the only option, however in the future we may
add alternatives (other encodings, hashes or even hash_name).

Suggested-by: Phillip Wood <phillip.wood123@gmail.com>
Suggested-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
seen
Adrian Ratiu 2025-10-06 14:25:17 +03:00 committed by Junio C Hamano
parent fb3b2e627f
commit 35dbc30c2a
8 changed files with 204 additions and 20 deletions

View File

@ -69,6 +69,15 @@ relativeWorktrees::
repaired with either the `--relative-paths` option or with the
`worktree.useRelativePaths` config set to `true`.

submoduleEncoding::
If enabled, submodule gitdir paths are encoded to avoid filesystem
conflicts due to nested gitdirs or case insensitivity. For now, only
url-encoding (rfc3986) is available, with a small addition to encode
uppercase to lowercase letters (`A -> _a`, `B -> _b` and so on).
Other encoding or hashing methods may be added in the future.
Any preexisting non-encoded submodule gitdirs are used as-is, to
ease migration and reduce risk of gitdirs not being recognized.

worktreeConfig::
If enabled, then worktrees will load config settings from the
`$GIT_DIR/config.worktree` file in addition to the

View File

@ -55,6 +55,9 @@ submodule.<name>.active::
submodule.<name>.gitdir::
This option sets the gitdir path for submodule <name>, allowing users
to override the default path or change the default path name encoding.
Submodule gitdir encoding is enabled via `extensions.submoduleEncoding`
(see linkgit:git-config[1]). This config works both with the extension
enabled or disabled.

submodule.active::
A repeated field which contains a pathspec used to match against a

View File

@ -158,6 +158,7 @@ struct repository {
int repository_format_worktree_config;
int repository_format_relative_worktrees;
int repository_format_precious_objects;
int repository_format_submodule_encoding;

/* Indicate if a repository has a different 'commondir' from 'gitdir' */
unsigned different_commondir:1;

View File

@ -687,6 +687,9 @@ static enum extension_result handle_extension(const char *var,
} else if (!strcmp(ext, "relativeworktrees")) {
data->relative_worktrees = git_config_bool(var, value);
return EXTENSION_OK;
} else if (!strcmp(ext, "submoduleencoding")) {
data->submodule_encoding = git_config_bool(var, value);
return EXTENSION_OK;
}
return EXTENSION_UNKNOWN;
}
@ -1864,6 +1867,8 @@ const char *setup_git_directory_gently(int *nongit_ok)
repo_fmt.worktree_config;
the_repository->repository_format_relative_worktrees =
repo_fmt.relative_worktrees;
the_repository->repository_format_submodule_encoding =
repo_fmt.submodule_encoding;
/* take ownership of repo_fmt.partial_clone */
the_repository->repository_format_partial_clone =
repo_fmt.partial_clone;
@ -1962,6 +1967,8 @@ void check_repository_format(struct repository_format *fmt)
fmt->ref_storage_format);
the_repository->repository_format_worktree_config =
fmt->worktree_config;
the_repository->repository_format_submodule_encoding =
fmt->submodule_encoding;
the_repository->repository_format_relative_worktrees =
fmt->relative_worktrees;
the_repository->repository_format_partial_clone =

View File

@ -130,6 +130,7 @@ struct repository_format {
char *partial_clone; /* value of extensions.partialclone */
int worktree_config;
int relative_worktrees;
int submodule_encoding;
int is_bare;
int hash_algo;
int compat_hash_algo;

View File

@ -2262,6 +2262,13 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name)
char *p;
int ret = 0;

/*
* Skip these checks when extensions.submoduleEncoding is enabled because
* it fixes the nesting issues and the suffixes will not match by design.
*/
if (the_repository->repository_format_submodule_encoding)
return 0;

if (len <= suffix_len || (p = git_dir + len - suffix_len)[-1] != '/' ||
strcmp(p, submodule_name))
BUG("submodule name '%s' not a suffix of git dir '%s'",
@ -2581,29 +2588,22 @@ cleanup:
return ret;
}

static void strbuf_addstr_case_encode(struct strbuf *dst, const char *src)
{
for (; *src; src++) {
unsigned char c = *src;
if (c >= 'A' && c <= 'Z') {
strbuf_addch(dst, '_');
strbuf_addch(dst, c - 'A' + 'a');
} else {
strbuf_addch(dst, c);
}
}
}

void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
const char *submodule_name)
{
/*
* NEEDSWORK: The current way of mapping a submodule's name to
* its location in .git/modules/ has problems with some naming
* schemes. For example, if a submodule is named "foo" and
* another is named "foo/bar" (whether present in the same
* superproject commit or not - the problem will arise if both
* superproject commits have been checked out at any point in
* time), or if two submodule names only have different cases in
* a case-insensitive filesystem.
*
* There are several solutions, including encoding the path in
* some way, introducing a submodule.<name>.gitdir config in
* .git/config (not .gitmodules) that allows overriding what the
* gitdir of a submodule would be (and teach Git, upon noticing
* a clash, to automatically determine a non-clashing name and
* to write such a config), or introducing a
* submodule.<name>.gitdir config in .gitmodules that repo
* administrators can explicitly set. Nothing has been decided,
* so for now, just append the name at the end of the path.
*/
char *gitdir_path, *key;

/* Allow config override. */
@ -2618,4 +2618,20 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,

repo_git_path_append(r, buf, "modules/");
strbuf_addstr(buf, submodule_name);

/* Existing legacy non-encoded names are used as-is */
if (is_git_directory(buf->buf))
return;

if (the_repository->repository_format_submodule_encoding) {
struct strbuf tmp = STRBUF_INIT;

strbuf_reset(buf);
repo_git_path_append(r, buf, "modules/");

strbuf_addstr_urlencode(&tmp, submodule_name, is_rfc3986_unreserved);
strbuf_addstr_case_encode(buf, tmp.buf);

strbuf_release(&tmp);
}
}

View File

@ -874,6 +874,7 @@ integration_tests = [
't7422-submodule-output.sh',
't7423-submodule-symlinks.sh',
't7424-submodule-mixed-ref-formats.sh',
't7425-submodule-encoding.sh',
't7450-bad-git-dotfiles.sh',
't7500-commit-template-squash-signoff.sh',
't7501-commit-basic-functionality.sh',

146
t/t7425-submodule-encoding.sh Executable file
View File

@ -0,0 +1,146 @@
#!/bin/sh

test_description='submodules handle mixed legacy and new (encoded) style gitdir paths'

. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-verify-submodule-gitdir-path.sh

test_expect_success 'setup: allow file protocol' '
git config --global protocol.file.allow always
'

test_expect_success 'create repo with mixed encoded and non-encoded submodules' '
git init -b main legacy-sub &&
test_commit -C legacy-sub legacy-initial &&
legacy_rev=$(git -C legacy-sub rev-parse HEAD) &&

git init -b main new-sub &&
test_commit -C new-sub new-initial &&
new_rev=$(git -C new-sub rev-parse HEAD) &&

git init -b main main &&
(
cd main &&
git submodule add ../legacy-sub legacy &&
test_commit legacy-sub &&

git config core.repositoryformatversion 1 &&
git config extensions.submoduleEncoding true &&

git submodule add ../new-sub "New Sub" &&
test_commit new
)
'

test_expect_success 'verify submodule name is properly encoded' '
verify_submodule_gitdir_path main legacy modules/legacy &&
verify_submodule_gitdir_path main "New Sub" modules/_new%20_sub
'

test_expect_success 'clone from repo with both legacy and new-style submodules' '
git clone --recurse-submodules main cloned-non-encoding &&
(
cd cloned-non-encoding &&

test_path_is_dir .git/modules/legacy &&
test_path_is_dir .git/modules/"New Sub" &&

git submodule status >list &&
test_grep "$legacy_rev legacy" list &&
test_grep "$new_rev New Sub" list
) &&

git clone -c extensions.submoduleEncoding=true --recurse-submodules main cloned-encoding &&
(
cd cloned-encoding &&

test_path_is_dir .git/modules/legacy &&
test_path_is_dir .git/modules/_new%20_sub &&

git submodule status >list &&
test_grep "$legacy_rev legacy" list &&
test_grep "$new_rev New Sub" list
)
'

test_expect_success 'commit and push changes to encoded submodules' '
git -C legacy-sub config receive.denyCurrentBranch updateInstead &&
git -C new-sub config receive.denyCurrentBranch updateInstead &&
git -C main config receive.denyCurrentBranch updateInstead &&
(
cd cloned-encoding &&

git -C legacy switch --track -C main origin/main &&
test_commit -C legacy second-commit &&
git -C legacy push &&

git -C "New Sub" switch --track -C main origin/main &&
test_commit -C "New Sub" second-commit &&
git -C "New Sub" push &&

# Stage and commit submodule changes in superproject
git switch --track -C main origin/main &&
git add legacy "New Sub" &&
git commit -m "update submodules" &&

# push superproject commit to main repo
git push
) &&

# update expected legacy & new submodule checksums
legacy_rev=$(git -C legacy-sub rev-parse HEAD) &&
new_rev=$(git -C new-sub rev-parse HEAD)
'

test_expect_success 'fetch mixed submodule changes and verify updates' '
(
cd main &&

# only update submodules because superproject was
# pushed into at the end of last test
git submodule update --init --recursive &&

test_path_is_dir .git/modules/legacy &&
test_path_is_dir .git/modules/_new%20_sub &&

# Verify both submodules are at the expected commits
git submodule status >list &&
test_grep "$legacy_rev legacy" list &&
test_grep "$new_rev New Sub" list
)
'

test_expect_success 'setup submodules with nested git dirs' '
git init nested &&
test_commit -C nested nested &&
(
cd nested &&
cat >.gitmodules <<-EOF &&
[submodule "hippo"]
url = .
path = thing1
[submodule "hippo/hooks"]
url = .
path = thing2
EOF
git clone . thing1 &&
git clone . thing2 &&
git add .gitmodules thing1 thing2 &&
test_tick &&
git commit -m nested
)
'

test_expect_success 'git dirs of encoded sibling submodules must not be nested' '
git clone -c extensions.submoduleEncoding=true --recurse-submodules nested clone_nested &&
verify_submodule_gitdir_path clone_nested hippo modules/hippo &&
verify_submodule_gitdir_path clone_nested hippo/hooks modules/hippo%2fhooks
'

test_expect_success 'submodule git dir nesting detection must work with parallel cloning' '
git clone -c extensions.submoduleEncoding=true --recurse-submodules --jobs=2 nested clone_parallel &&
verify_submodule_gitdir_path clone_parallel hippo modules/hippo &&
verify_submodule_gitdir_path clone_parallel hippo/hooks modules/hippo%2fhooks
'

test_done