Merge branch 'ar/submodule-gitdir-tweak' into seen

* ar/submodule-gitdir-tweak:
  submodule: error out if gitdir name is too long
  submodule: encode gitdir paths to avoid conflicts
  strbuf: bring back is_rfc3986_unreserved
  submodule: add gitdir path config override
  submodule--helper: use submodule_name_to_gitdir in add_submodule
Junio C Hamano 2025-10-06 10:35:47 -07:00
commit 837e4ceba1
20 changed files with 340 additions and 32 deletions

View File

@ -72,6 +72,15 @@ relativeWorktrees:::
repaired with either the `--relative-paths` option or with the
`worktree.useRelativePaths` config set to `true`.

submoduleEncoding:::
If enabled, submodule gitdir paths are encoded to avoid filesystem
conflicts due to nested gitdirs or case insensitivity. For now, only
url-encoding (rfc3986) is available, with a small addition to encode
uppercase to lowercase letters (`A -> _a`, `B -> _b` and so on).
Other encoding or hashing methods may be added in the future.
Any preexisting non-encoded submodule gitdirs are used as-is, to
ease migration and reduce risk of gitdirs not being recognized.

worktreeConfig:::
If enabled, then worktrees will load config settings from the
`$GIT_DIR/config.worktree` file in addition to the

View File

@ -52,6 +52,13 @@ submodule.<name>.active::
submodule.active config option. See linkgit:gitsubmodules[7] for
details.

submodule.<name>.gitdir::
This option sets the gitdir path for submodule <name>, allowing users
to override the default path or change the default path name encoding.
Submodule gitdir encoding is enabled via `extensions.submoduleEncoding`
(see linkgit:git-config[1]). This config works both with the extension
enabled or disabled.

submodule.active::
A repeated field which contains a pathspec used to match against a
submodule's path to determine if the submodule is of interest to git

View File

@ -2250,6 +2250,11 @@ ifndef HAVE_PLATFORM_PROCINFO
COMPAT_OBJS += compat/stub/procinfo.o
endif

ifdef NO_PATHCONF
COMPAT_CFLAGS += -DNO_PATHCONF
COMPAT_OBJS += compat/pathconf.o
endif

ifdef RUNTIME_PREFIX

ifdef HAVE_BSD_KERN_PROC_SYSCTL

View File

@ -76,12 +76,6 @@ static void rewrite_credential_file(const char *fn, struct credential *c,
die_errno("unable to write credential store");
}

static int is_rfc3986_unreserved(char ch)
{
return isalnum(ch) ||
ch == '-' || ch == '_' || ch == '.' || ch == '~';
}

static int is_rfc3986_reserved_or_unreserved(char ch)
{
if (is_rfc3986_unreserved(ch))

View File

@ -1208,6 +1208,22 @@ static int module_summary(int argc, const char **argv, const char *prefix,
return ret;
}

static int module_gitdir(int argc, const char **argv, const char *prefix UNUSED,
struct repository *repo)
{
struct strbuf gitdir = STRBUF_INIT;

if (argc != 2)
usage(_("git submodule--helper gitdir <name>"));

submodule_name_to_gitdir(&gitdir, repo, argv[1]);

printf("%s\n", gitdir.buf);

strbuf_release(&gitdir);
return 0;
}

struct sync_cb {
const char *prefix;
const char *super_prefix;
@ -3187,13 +3203,13 @@ static void append_fetch_remotes(struct strbuf *msg, const char *git_dir_path)

static int add_submodule(const struct add_data *add_data)
{
char *submod_gitdir_path;
struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT;
struct string_list reference = STRING_LIST_INIT_NODUP;
int ret = -1;

/* perhaps the path already exists and is already a git repo, else clone it */
if (is_directory(add_data->sm_path)) {
char *submod_gitdir_path;
struct strbuf sm_path = STRBUF_INIT;
strbuf_addstr(&sm_path, add_data->sm_path);
submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path);
@ -3207,10 +3223,11 @@ static int add_submodule(const struct add_data *add_data)
free(submod_gitdir_path);
} else {
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf submod_gitdir = STRBUF_INIT;

submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name);
submodule_name_to_gitdir(&submod_gitdir, the_repository, add_data->sm_name);

if (is_directory(submod_gitdir_path)) {
if (is_directory(submod_gitdir.buf)) {
if (!add_data->force) {
struct strbuf msg = STRBUF_INIT;
char *die_msg;
@ -3219,8 +3236,8 @@ static int add_submodule(const struct add_data *add_data)
"locally with remote(s):\n"),
add_data->sm_name);

append_fetch_remotes(&msg, submod_gitdir_path);
free(submod_gitdir_path);
append_fetch_remotes(&msg, submod_gitdir.buf);
strbuf_release(&submod_gitdir);

strbuf_addf(&msg, _("If you want to reuse this local git "
"directory instead of cloning again from\n"
@ -3238,7 +3255,7 @@ static int add_submodule(const struct add_data *add_data)
"submodule '%s'\n"), add_data->sm_name);
}
}
free(submod_gitdir_path);
strbuf_release(&submod_gitdir);

clone_data.prefix = add_data->prefix;
clone_data.path = add_data->sm_path;
@ -3590,6 +3607,7 @@ int cmd_submodule__helper(int argc,
NULL
};
struct option options[] = {
OPT_SUBCOMMAND("gitdir", &fn, module_gitdir),
OPT_SUBCOMMAND("clone", &fn, module_clone),
OPT_SUBCOMMAND("add", &fn, module_add),
OPT_SUBCOMMAND("update", &fn, module_update),

10
compat/pathconf.c Normal file
View File

@ -0,0 +1,10 @@
#include "git-compat-util.h"

/*
* Minimal stub for platforms without pathconf() (e.g. Windows),
* to fall back to NAME_MAX from limits.h or compat/posix.h.
*/
long git_pathconf(const char *path UNUSED, int name UNUSED)
{
return -1;
}

View File

@ -250,6 +250,14 @@ char *gitdirname(char *);
#define NAME_MAX 255
#endif

#ifdef NO_PATHCONF
#ifndef _PC_NAME_MAX
#define _PC_NAME_MAX 1 /* dummy value, only used for git_pathconf */
#endif
#define pathconf(a,b) git_pathconf(a,b)
long git_pathconf(const char *path, int name);
#endif

typedef uintmax_t timestamp_t;
#define PRItime PRIuMAX
#define parse_timestamp strtoumax

View File

@ -473,6 +473,7 @@ ifeq ($(uname_S),Windows)
NEEDS_CRYPTO_WITH_SSL = YesPlease
NO_LIBGEN_H = YesPlease
NO_POLL = YesPlease
NO_PATHCONF = YesPlease
NO_SYMLINK_HEAD = YesPlease
NO_IPV6 = YesPlease
NO_SETENV = YesPlease
@ -688,6 +689,7 @@ ifeq ($(uname_S),MINGW)
NEEDS_CRYPTO_WITH_SSL = YesPlease
NO_LIBGEN_H = YesPlease
NO_POLL = YesPlease
NO_PATHCONF = YesPlease
NO_SYMLINK_HEAD = YesPlease
NO_SETENV = YesPlease
NO_STRCASESTR = YesPlease

View File

@ -1396,6 +1396,7 @@ checkfuncs = {
'initgroups' : [],
'strtoumax' : ['strtoumax.c', 'strtoimax.c'],
'pread' : ['pread.c'],
'pathconf' : ['pathconf.c'],
}

if host_machine.system() == 'windows'

View File

@ -158,6 +158,7 @@ struct repository {
int repository_format_worktree_config;
int repository_format_relative_worktrees;
int repository_format_precious_objects;
int repository_format_submodule_encoding;

/* Indicate if a repository has a different 'commondir' from 'gitdir' */
unsigned different_commondir:1;

View File

@ -687,6 +687,9 @@ static enum extension_result handle_extension(const char *var,
} else if (!strcmp(ext, "relativeworktrees")) {
data->relative_worktrees = git_config_bool(var, value);
return EXTENSION_OK;
} else if (!strcmp(ext, "submoduleencoding")) {
data->submodule_encoding = git_config_bool(var, value);
return EXTENSION_OK;
}
return EXTENSION_UNKNOWN;
}
@ -1865,6 +1868,8 @@ const char *setup_git_directory_gently(int *nongit_ok)
repo_fmt.worktree_config;
the_repository->repository_format_relative_worktrees =
repo_fmt.relative_worktrees;
the_repository->repository_format_submodule_encoding =
repo_fmt.submodule_encoding;
/* take ownership of repo_fmt.partial_clone */
the_repository->repository_format_partial_clone =
repo_fmt.partial_clone;
@ -1963,6 +1968,8 @@ void check_repository_format(struct repository_format *fmt)
fmt->ref_storage_format);
the_repository->repository_format_worktree_config =
fmt->worktree_config;
the_repository->repository_format_submodule_encoding =
fmt->submodule_encoding;
the_repository->repository_format_relative_worktrees =
fmt->relative_worktrees;
the_repository->repository_format_partial_clone =

View File

@ -130,6 +130,7 @@ struct repository_format {
char *partial_clone; /* value of extensions.partialclone */
int worktree_config;
int relative_worktrees;
int submodule_encoding;
int is_bare;
int hash_algo;
int compat_hash_algo;

View File

@ -817,6 +817,12 @@ void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s)
}
}

int is_rfc3986_unreserved(char ch)
{
return isalnum(ch) ||
ch == '-' || ch == '_' || ch == '.' || ch == '~';
}

static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len,
char_predicate allow_unencoded_fn)
{

View File

@ -640,6 +640,8 @@ static inline void strbuf_complete_line(struct strbuf *sb)

typedef int (*char_predicate)(char ch);

int is_rfc3986_unreserved(char ch);

void strbuf_addstr_urlencode(struct strbuf *sb, const char *name,
char_predicate allow_unencoded_fn);


View File

@ -2262,6 +2262,13 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name)
char *p;
int ret = 0;

/*
* Skip these checks when extensions.submoduleEncoding is enabled because
* it fixes the nesting issues and the suffixes will not match by design.
*/
if (the_repository->repository_format_submodule_encoding)
return 0;

if (len <= suffix_len || (p = git_dir + len - suffix_len)[-1] != '/' ||
strcmp(p, submodule_name))
BUG("submodule name '%s' not a suffix of git dir '%s'",
@ -2581,29 +2588,66 @@ cleanup:
return ret;
}

static void strbuf_addstr_case_encode(struct strbuf *dst, const char *src)
{
for (; *src; src++) {
unsigned char c = *src;
if (c >= 'A' && c <= 'Z') {
strbuf_addch(dst, '_');
strbuf_addch(dst, c - 'A' + 'a');
} else {
strbuf_addch(dst, c);
}
}
}

void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
const char *submodule_name)
{
/*
* NEEDSWORK: The current way of mapping a submodule's name to
* its location in .git/modules/ has problems with some naming
* schemes. For example, if a submodule is named "foo" and
* another is named "foo/bar" (whether present in the same
* superproject commit or not - the problem will arise if both
* superproject commits have been checked out at any point in
* time), or if two submodule names only have different cases in
* a case-insensitive filesystem.
*
* There are several solutions, including encoding the path in
* some way, introducing a submodule.<name>.gitdir config in
* .git/config (not .gitmodules) that allows overriding what the
* gitdir of a submodule would be (and teach Git, upon noticing
* a clash, to automatically determine a non-clashing name and
* to write such a config), or introducing a
* submodule.<name>.gitdir config in .gitmodules that repo
* administrators can explicitly set. Nothing has been decided,
* so for now, just append the name at the end of the path.
*/
char *gitdir_path, *key;

/* Allow config override. */
key = xstrfmt("submodule.%s.gitdirpath", submodule_name);
if (!repo_config_get_string(r, key, &gitdir_path)) {
strbuf_addstr(buf, gitdir_path);
free(key);
free(gitdir_path);
return;
}
free(key);

repo_git_path_append(r, buf, "modules/");
strbuf_addstr(buf, submodule_name);

/* Existing legacy non-encoded names are used as-is */
if (is_git_directory(buf->buf))
return;

if (the_repository->repository_format_submodule_encoding) {
struct strbuf tmp = STRBUF_INIT;
size_t base_len;
long name_max;

strbuf_reset(buf);
repo_git_path_append(r, buf, "modules/");
base_len = buf->len;

strbuf_addstr_urlencode(&tmp, submodule_name, is_rfc3986_unreserved);
strbuf_addstr_case_encode(buf, tmp.buf);

/* Ensure final path length is below NAME_MAX after encoding */
name_max = pathconf(buf->buf, _PC_NAME_MAX);
if (name_max == -1)
name_max = NAME_MAX;

if (buf->len - base_len > name_max)
/*
* TODO: make this smarter; instead of erroring out, maybe we could trim or
* shard the gitdir names to make them fit under NAME_MAX.
*/
die(_("submodule name %s is too long (%"PRIuMAX" bytes, limit %"PRIuMAX")"),
buf->buf, (uintmax_t)buf->len - base_len, (uintmax_t)name_max);

strbuf_release(&tmp);
}
}

View File

@ -0,0 +1,20 @@
# Helper to verify if repo $1 contains a submodule named $2 with gitdir path $3

# This does not check filesystem existence. That is done in submodule.c via the
# submodule_name_to_gitdir() API which this helper ends up calling. The gitdirs
# might or might not exist (e.g. when adding a new submodule), so this only
# checks the expected configuration path, which might be overridden by the user.

verify_submodule_gitdir_path() {
repo="$1" &&
name="$2" &&
path="$3" &&
(
cd "$repo" &&
cat >expect <<-EOF &&
$(git rev-parse --git-common-dir)/$path
EOF
git submodule--helper gitdir "$name" >actual &&
test_cmp expect actual
)
}

View File

@ -887,6 +887,7 @@ integration_tests = [
't7422-submodule-output.sh',
't7423-submodule-symlinks.sh',
't7424-submodule-mixed-ref-formats.sh',
't7425-submodule-encoding.sh',
't7450-bad-git-dotfiles.sh',
't7500-commit-template-squash-signoff.sh',
't7501-commit-basic-functionality.sh',

View File

@ -13,6 +13,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME

. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-verify-submodule-gitdir-path.sh

test_expect_success 'setup - enable local submodules' '
git config --global protocol.file.allow always
@ -1505,4 +1506,12 @@ test_expect_success 'submodule add fails when name is reused' '
)
'

test_expect_success 'submodule helper gitdir config overrides' '
verify_submodule_gitdir_path test-submodule child modules/child &&
test_config -C test-submodule submodule.child.gitdirpath ".git/modules/custom-child" &&
verify_submodule_gitdir_path test-submodule child modules/custom-child &&
test_unconfig -C test-submodule submodule.child.gitdirpath &&
verify_submodule_gitdir_path test-submodule child modules/child
'

test_done

162
t/t7425-submodule-encoding.sh Executable file
View File

@ -0,0 +1,162 @@
#!/bin/sh

test_description='submodules handle mixed legacy and new (encoded) style gitdir paths'

. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-verify-submodule-gitdir-path.sh

test_expect_success 'setup: allow file protocol' '
git config --global protocol.file.allow always
'

test_expect_success 'create repo with mixed encoded and non-encoded submodules' '
git init -b main legacy-sub &&
test_commit -C legacy-sub legacy-initial &&
legacy_rev=$(git -C legacy-sub rev-parse HEAD) &&

git init -b main new-sub &&
test_commit -C new-sub new-initial &&
new_rev=$(git -C new-sub rev-parse HEAD) &&

git init -b main main &&
(
cd main &&
git submodule add ../legacy-sub legacy &&
test_commit legacy-sub &&

git config core.repositoryformatversion 1 &&
git config extensions.submoduleEncoding true &&

git submodule add ../new-sub "New Sub" &&
test_commit new
)
'

test_expect_success 'verify submodule name is properly encoded' '
verify_submodule_gitdir_path main legacy modules/legacy &&
verify_submodule_gitdir_path main "New Sub" modules/_new%20_sub
'

test_expect_success 'clone from repo with both legacy and new-style submodules' '
git clone --recurse-submodules main cloned-non-encoding &&
(
cd cloned-non-encoding &&

test_path_is_dir .git/modules/legacy &&
test_path_is_dir .git/modules/"New Sub" &&

git submodule status >list &&
test_grep "$legacy_rev legacy" list &&
test_grep "$new_rev New Sub" list
) &&

git clone -c extensions.submoduleEncoding=true --recurse-submodules main cloned-encoding &&
(
cd cloned-encoding &&

test_path_is_dir .git/modules/legacy &&
test_path_is_dir .git/modules/_new%20_sub &&

git submodule status >list &&
test_grep "$legacy_rev legacy" list &&
test_grep "$new_rev New Sub" list
)
'

test_expect_success 'commit and push changes to encoded submodules' '
git -C legacy-sub config receive.denyCurrentBranch updateInstead &&
git -C new-sub config receive.denyCurrentBranch updateInstead &&
git -C main config receive.denyCurrentBranch updateInstead &&
(
cd cloned-encoding &&

git -C legacy switch --track -C main origin/main &&
test_commit -C legacy second-commit &&
git -C legacy push &&

git -C "New Sub" switch --track -C main origin/main &&
test_commit -C "New Sub" second-commit &&
git -C "New Sub" push &&

# Stage and commit submodule changes in superproject
git switch --track -C main origin/main &&
git add legacy "New Sub" &&
git commit -m "update submodules" &&

# push superproject commit to main repo
git push
) &&

# update expected legacy & new submodule checksums
legacy_rev=$(git -C legacy-sub rev-parse HEAD) &&
new_rev=$(git -C new-sub rev-parse HEAD)
'

test_expect_success 'fetch mixed submodule changes and verify updates' '
(
cd main &&

# only update submodules because superproject was
# pushed into at the end of last test
git submodule update --init --recursive &&

test_path_is_dir .git/modules/legacy &&
test_path_is_dir .git/modules/_new%20_sub &&

# Verify both submodules are at the expected commits
git submodule status >list &&
test_grep "$legacy_rev legacy" list &&
test_grep "$new_rev New Sub" list
)
'

test_expect_success 'setup submodules with nested git dirs' '
git init nested &&
test_commit -C nested nested &&
(
cd nested &&
cat >.gitmodules <<-EOF &&
[submodule "hippo"]
url = .
path = thing1
[submodule "hippo/hooks"]
url = .
path = thing2
EOF
git clone . thing1 &&
git clone . thing2 &&
git add .gitmodules thing1 thing2 &&
test_tick &&
git commit -m nested
)
'

test_expect_success 'git dirs of encoded sibling submodules must not be nested' '
git clone -c extensions.submoduleEncoding=true --recurse-submodules nested clone_nested &&
verify_submodule_gitdir_path clone_nested hippo modules/hippo &&
verify_submodule_gitdir_path clone_nested hippo/hooks modules/hippo%2fhooks
'

test_expect_success 'submodule git dir nesting detection must work with parallel cloning' '
git clone -c extensions.submoduleEncoding=true --recurse-submodules --jobs=2 nested clone_parallel &&
verify_submodule_gitdir_path clone_parallel hippo modules/hippo &&
verify_submodule_gitdir_path clone_parallel hippo/hooks modules/hippo%2fhooks
'

test_expect_success 'submodule encoded name exceeds max name limit' '
(
cd main &&

# find the system NAME_MAX (fall back to 255 if unknown)
name_max=$(getconf NAME_MAX . 2>/dev/null || echo 255) &&

# each "%" char encodes to "%25" (3 chars), ensure we exceed NAME_MAX
count=$((name_max + 10)) &&
longname=$(test_seq -f "%%%0.s" 1 $count | tr -d "\n") &&

test_must_fail git submodule add ../new-sub "$longname" 2>err &&
test_grep "fatal: submodule name .* is too long" err
)
'

test_done

View File

@ -3053,6 +3053,7 @@ test_expect_success 'git config set - variable name - __git_compute_second_level
submodule.sub.fetchRecurseSubmodules Z
submodule.sub.ignore Z
submodule.sub.active Z
submodule.sub.gitdir Z
EOF
'