From 831e02340b9de46c9ea0a1bbce3894f390f5a45e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:49 +0100 Subject: [PATCH 01/13] path: move `enter_repo()` into "setup.c" The function `enter_repo()` is used to enter a repository at a given path. As such it sits way closer to setting up a repository than it does with handling paths, but regardless of that it's located in "path.c" instead of in "setup.c". Move the function into "setup.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- builtin/upload-archive.c | 2 +- builtin/upload-pack.c | 2 +- http-backend.c | 1 + path.c | 100 --------------------------------------- path.h | 15 ------ setup.c | 81 +++++++++++++++++++++++++++++++ setup.h | 38 +++++++++++++++ 8 files changed, 123 insertions(+), 118 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c9288a9c7e..79a0fd4756 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -34,7 +34,6 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" -#include "path.h" #include "protocol.h" #include "commit-reach.h" #include "server-info.h" @@ -42,6 +41,7 @@ #include "trace2.h" #include "worktree.h" #include "shallow.h" +#include "setup.h" #include "parse-options.h" static const char * const receive_pack_usage[] = { diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 97d7c9522f..25312bb2a5 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -4,8 +4,8 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "archive.h" -#include "path.h" #include "pkt-line.h" +#include "setup.h" #include "sideband.h" #include "run-command.h" #include "strvec.h" diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index c2bbc035ab..30498fafea 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -5,11 +5,11 @@ #include "gettext.h" #include "pkt-line.h" #include "parse-options.h" -#include "path.h" #include "protocol.h" #include "replace-object.h" #include "upload-pack.h" #include "serve.h" +#include "setup.h" #include "commit.h" #include "environment.h" diff --git a/http-backend.c b/http-backend.c index 52f0483dd3..e9d1ef92bd 100644 --- a/http-backend.c +++ b/http-backend.c @@ -16,6 +16,7 @@ #include "run-command.h" #include "string-list.h" #include "url.h" +#include "setup.h" #include "strvec.h" #include "packfile.h" #include "odb.h" diff --git a/path.c b/path.c index 7f56eaf993..d726537622 100644 --- a/path.c +++ b/path.c @@ -738,106 +738,6 @@ return_null: return NULL; } -/* - * First, one directory to try is determined by the following algorithm. - * - * (0) If "strict" is given, the path is used as given and no DWIM is - * done. Otherwise: - * (1) "~/path" to mean path under the running user's home directory; - * (2) "~user/path" to mean path under named user's home directory; - * (3) "relative/path" to mean cwd relative directory; or - * (4) "/absolute/path" to mean absolute directory. - * - * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" - * in this order. We select the first one that is a valid git repository, and - * chdir() to it. If none match, or we fail to chdir, we return NULL. - * - * If all goes well, we return the directory we used to chdir() (but - * before ~user is expanded), avoiding getcwd() resolving symbolic - * links. User relative paths are also returned as they are given, - * except DWIM suffixing. - */ -const char *enter_repo(const char *path, unsigned flags) -{ - static struct strbuf validated_path = STRBUF_INIT; - static struct strbuf used_path = STRBUF_INIT; - - if (!path) - return NULL; - - if (!(flags & ENTER_REPO_STRICT)) { - static const char *suffix[] = { - "/.git", "", ".git/.git", ".git", NULL, - }; - const char *gitfile; - int len = strlen(path); - int i; - while ((1 < len) && (path[len-1] == '/')) - len--; - - /* - * We can handle arbitrary-sized buffers, but this remains as a - * sanity check on untrusted input. - */ - if (PATH_MAX <= len) - return NULL; - - strbuf_reset(&used_path); - strbuf_reset(&validated_path); - strbuf_add(&used_path, path, len); - strbuf_add(&validated_path, path, len); - - if (used_path.buf[0] == '~') { - char *newpath = interpolate_path(used_path.buf, 0); - if (!newpath) - return NULL; - strbuf_attach(&used_path, newpath, strlen(newpath), - strlen(newpath)); - } - for (i = 0; suffix[i]; i++) { - struct stat st; - size_t baselen = used_path.len; - strbuf_addstr(&used_path, suffix[i]); - if (!stat(used_path.buf, &st) && - (S_ISREG(st.st_mode) || - (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { - strbuf_addstr(&validated_path, suffix[i]); - break; - } - strbuf_setlen(&used_path, baselen); - } - if (!suffix[i]) - return NULL; - gitfile = read_gitfile(used_path.buf); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, used_path.buf); - if (gitfile) { - strbuf_reset(&used_path); - strbuf_addstr(&used_path, gitfile); - } - if (chdir(used_path.buf)) - return NULL; - path = validated_path.buf; - } - else { - const char *gitfile = read_gitfile(path); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, path); - if (gitfile) - path = gitfile; - if (chdir(path)) - return NULL; - } - - if (is_git_directory(".")) { - set_git_dir(".", 0); - check_repository_format(NULL); - return path; - } - - return NULL; -} - int calc_shared_perm(struct repository *repo, int mode) { diff --git a/path.h b/path.h index e67348f253..0ec95a0b07 100644 --- a/path.h +++ b/path.h @@ -146,21 +146,6 @@ int adjust_shared_perm(struct repository *repo, const char *path); char *interpolate_path(const char *path, int real_home); -/* The bits are as follows: - * - * - ENTER_REPO_STRICT: callers that require exact paths (as opposed - * to allowing known suffixes like ".git", ".git/.git" to be - * omitted) can set this bit. - * - * - ENTER_REPO_ANY_OWNER_OK: callers that are willing to run without - * ownership check can set this bit. - */ -enum { - ENTER_REPO_STRICT = (1<<0), - ENTER_REPO_ANY_OWNER_OK = (1<<1), -}; - -const char *enter_repo(const char *path, unsigned flags); const char *remove_leading_path(const char *in, const char *prefix); const char *relative_path(const char *in, const char *prefix, struct strbuf *sb); int normalize_path_copy_len(char *dst, const char *src, int *prefix_len); diff --git a/setup.c b/setup.c index 7086741e6c..98c6fd8ee4 100644 --- a/setup.c +++ b/setup.c @@ -1703,6 +1703,87 @@ void set_git_dir(const char *path, int make_realpath) strbuf_release(&realpath); } +const char *enter_repo(const char *path, unsigned flags) +{ + static struct strbuf validated_path = STRBUF_INIT; + static struct strbuf used_path = STRBUF_INIT; + + if (!path) + return NULL; + + if (!(flags & ENTER_REPO_STRICT)) { + static const char *suffix[] = { + "/.git", "", ".git/.git", ".git", NULL, + }; + const char *gitfile; + int len = strlen(path); + int i; + while ((1 < len) && (path[len-1] == '/')) + len--; + + /* + * We can handle arbitrary-sized buffers, but this remains as a + * sanity check on untrusted input. + */ + if (PATH_MAX <= len) + return NULL; + + strbuf_reset(&used_path); + strbuf_reset(&validated_path); + strbuf_add(&used_path, path, len); + strbuf_add(&validated_path, path, len); + + if (used_path.buf[0] == '~') { + char *newpath = interpolate_path(used_path.buf, 0); + if (!newpath) + return NULL; + strbuf_attach(&used_path, newpath, strlen(newpath), + strlen(newpath)); + } + for (i = 0; suffix[i]; i++) { + struct stat st; + size_t baselen = used_path.len; + strbuf_addstr(&used_path, suffix[i]); + if (!stat(used_path.buf, &st) && + (S_ISREG(st.st_mode) || + (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { + strbuf_addstr(&validated_path, suffix[i]); + break; + } + strbuf_setlen(&used_path, baselen); + } + if (!suffix[i]) + return NULL; + gitfile = read_gitfile(used_path.buf); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, used_path.buf); + if (gitfile) { + strbuf_reset(&used_path); + strbuf_addstr(&used_path, gitfile); + } + if (chdir(used_path.buf)) + return NULL; + path = validated_path.buf; + } + else { + const char *gitfile = read_gitfile(path); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, path); + if (gitfile) + path = gitfile; + if (chdir(path)) + return NULL; + } + + if (is_git_directory(".")) { + set_git_dir(".", 0); + check_repository_format(NULL); + return path; + } + + return NULL; +} + static int git_work_tree_initialized; /* diff --git a/setup.h b/setup.h index 8522fa8575..bfea199bcd 100644 --- a/setup.h +++ b/setup.h @@ -97,6 +97,44 @@ static inline int discover_git_directory(struct strbuf *commondir, void set_git_dir(const char *path, int make_realpath); void set_git_work_tree(const char *tree); +/* Flags that can be passed to `enter_repo()`. */ +enum { + /* + * Callers that require exact paths (as opposed to allowing known + * suffixes like ".git", ".git/.git" to be omitted) can set this bit. + */ + ENTER_REPO_STRICT = (1<<0), + + /* + * Callers that are willing to run without ownership check can set this + * bit. + */ + ENTER_REPO_ANY_OWNER_OK = (1<<1), +}; + +/* + * Discover and enter a repository. + * + * First, one directory to try is determined by the following algorithm. + * + * (0) If "strict" is given, the path is used as given and no DWIM is + * done. Otherwise: + * (1) "~/path" to mean path under the running user's home directory; + * (2) "~user/path" to mean path under named user's home directory; + * (3) "relative/path" to mean cwd relative directory; or + * (4) "/absolute/path" to mean absolute directory. + * + * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" + * in this order. We select the first one that is a valid git repository, and + * chdir() to it. If none match, or we fail to chdir, we return NULL. + * + * If all goes well, we return the directory we used to chdir() (but + * before ~user is expanded), avoiding getcwd() resolving symbolic + * links. User relative paths are also returned as they are given, + * except DWIM suffixing. + */ +const char *enter_repo(const char *path, unsigned flags); + const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); char *prefix_path(const char *prefix, int len, const char *path); From 7c188a9e45405ff911b81a5dd9029f4e91fb338e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:50 +0100 Subject: [PATCH 02/13] setup: convert `set_git_dir()` to have file scope We don't have any external callers of `set_git_dir()` anymore now that `enter_repo()` has been moved into "setup.c". Remove the declaration and mark the function as static. Note that this change requires us to move the implementation around so that we can avoid adding any new forward declarations. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 80 ++++++++++++++++++++++++++++----------------------------- setup.h | 1 - 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/setup.c b/setup.c index 98c6fd8ee4..8bf52df716 100644 --- a/setup.c +++ b/setup.c @@ -1002,6 +1002,46 @@ cleanup_return: return error_code ? NULL : path; } +static void set_git_dir_1(const char *path) +{ + xsetenv(GIT_DIR_ENVIRONMENT, path, 1); + setup_git_env(path); +} + +static void update_relative_gitdir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *data UNUSED) +{ + char *path = reparent_relative_path(old_cwd, new_cwd, + repo_get_git_dir(the_repository)); + struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); + + trace_printf_key(&trace_setup_key, + "setup: move $GIT_DIR to '%s'", + path); + set_git_dir_1(path); + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); + free(path); +} + +static void set_git_dir(const char *path, int make_realpath) +{ + struct strbuf realpath = STRBUF_INIT; + + if (make_realpath) { + strbuf_realpath(&realpath, path, 1); + path = realpath.buf; + } + + set_git_dir_1(path); + if (!is_absolute_path(path)) + chdir_notify_register(NULL, update_relative_gitdir, NULL); + + strbuf_release(&realpath); +} + static const char *setup_explicit_git_dir(const char *gitdirenv, struct strbuf *cwd, struct repository_format *repo_fmt, @@ -1663,46 +1703,6 @@ void setup_git_env(const char *git_dir) fetch_if_missing = 0; } -static void set_git_dir_1(const char *path) -{ - xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env(path); -} - -static void update_relative_gitdir(const char *name UNUSED, - const char *old_cwd, - const char *new_cwd, - void *data UNUSED) -{ - char *path = reparent_relative_path(old_cwd, new_cwd, - repo_get_git_dir(the_repository)); - struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); - - trace_printf_key(&trace_setup_key, - "setup: move $GIT_DIR to '%s'", - path); - set_git_dir_1(path); - if (tmp_objdir) - tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); - free(path); -} - -void set_git_dir(const char *path, int make_realpath) -{ - struct strbuf realpath = STRBUF_INIT; - - if (make_realpath) { - strbuf_realpath(&realpath, path, 1); - path = realpath.buf; - } - - set_git_dir_1(path); - if (!is_absolute_path(path)) - chdir_notify_register(NULL, update_relative_gitdir, NULL); - - strbuf_release(&realpath); -} - const char *enter_repo(const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; diff --git a/setup.h b/setup.h index bfea199bcd..d55dcc6608 100644 --- a/setup.h +++ b/setup.h @@ -94,7 +94,6 @@ static inline int discover_git_directory(struct strbuf *commondir, return 0; } -void set_git_dir(const char *path, int make_realpath); void set_git_work_tree(const char *tree); /* Flags that can be passed to `enter_repo()`. */ From 9aaba579932781c74f67d6cecddaad59f0daaaef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:51 +0100 Subject: [PATCH 03/13] odb: adopt logic to close object databases The logic to close an object database is currently contained in the packfile subsystem. That choice is somewhat relatable, as most of the logic really is to close resources associated with the packfile store itself. But we also end up handling object sources and commit graphs, which certainly is not related to packfiles. Move the function into the object database subsystem and rename it to `odb_close()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/gc.c | 2 +- builtin/repack.c | 2 +- midx-write.c | 2 +- odb.c | 18 +++++++++++++++++- odb.h | 7 +++++++ packfile.c | 15 --------------- packfile.h | 1 - run-command.c | 2 +- scalar.c | 2 +- 10 files changed, 30 insertions(+), 23 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index c990f398ef..b19b302b06 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1617,7 +1617,7 @@ int cmd_clone(int argc, transport_disconnect(transport); if (option_dissociate) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); dissociate_from_references(); } diff --git a/builtin/gc.c b/builtin/gc.c index d212cbb9b8..961fa343c4 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1048,7 +1048,7 @@ int cmd_gc(int argc, report_garbage = report_pack_garbage; odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); clean_pack_garbage(); } diff --git a/builtin/repack.c b/builtin/repack.c index cfdb4c0920..d9012141f6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -488,7 +488,7 @@ int cmd_repack(int argc, string_list_sort(&names); - close_object_store(repo->objects); + odb_close(repo->objects); /* * Ok we have prepared all new packfiles. diff --git a/midx-write.c b/midx-write.c index c73010df6d..60497586fd 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1459,7 +1459,7 @@ static int write_midx_internal(struct odb_source *source, } if (ctx.m || ctx.base_midx) - close_object_store(ctx.repo->objects); + odb_close(ctx.repo->objects); if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); diff --git a/odb.c b/odb.c index 3ec21ef24e..bcefa5cede 100644 --- a/odb.c +++ b/odb.c @@ -9,6 +9,7 @@ #include "khash.h" #include "lockfile.h" #include "loose.h" +#include "midx.h" #include "object-file-convert.h" #include "object-file.h" #include "odb.h" @@ -1044,6 +1045,21 @@ struct object_database *odb_new(struct repository *repo) return o; } +void odb_close(struct object_database *o) +{ + struct odb_source *source; + + packfile_store_close(o->packfiles); + + for (source = o->sources; source; source = source->next) { + if (source->midx) + close_midx(source->midx); + source->midx = NULL; + } + + close_commit_graph(o); +} + static void odb_free_sources(struct object_database *o) { while (o->sources) { @@ -1076,7 +1092,7 @@ void odb_clear(struct object_database *o) free((char *) o->cached_objects[i].value.buf); FREE_AND_NULL(o->cached_objects); - close_object_store(o); + odb_close(o); packfile_store_free(o->packfiles); o->packfiles = NULL; diff --git a/odb.h b/odb.h index 9bb28008b1..71b4897c82 100644 --- a/odb.h +++ b/odb.h @@ -169,6 +169,13 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Close the object database and all of its sources so that any held resources + * will be released. The database can still be used after closing it, in which + * case these resources may be reallocated. + */ +void odb_close(struct object_database *o); + /* * Clear caches, reload alternates and then reload object sources so that new * objects may become accessible. diff --git a/packfile.c b/packfile.c index 40f733dd23..af71eaf7e3 100644 --- a/packfile.c +++ b/packfile.c @@ -359,21 +359,6 @@ void close_pack(struct packed_git *p) oidset_clear(&p->bad_objects); } -void close_object_store(struct object_database *o) -{ - struct odb_source *source; - - packfile_store_close(o->packfiles); - - for (source = o->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - - close_commit_graph(o); -} - void unlink_pack_path(const char *pack_name, int force_delete) { static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"}; diff --git a/packfile.h b/packfile.h index 58fcc88e20..d9226a072a 100644 --- a/packfile.h +++ b/packfile.h @@ -279,7 +279,6 @@ struct object_database; unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); void close_pack_windows(struct packed_git *); void close_pack(struct packed_git *); -void close_object_store(struct object_database *o); void unuse_pack(struct pack_window **); void clear_delta_base_cache(void); struct packed_git *add_packed_git(struct repository *r, const char *path, diff --git a/run-command.c b/run-command.c index ed9575bd6a..e3e02475cc 100644 --- a/run-command.c +++ b/run-command.c @@ -743,7 +743,7 @@ fail_pipe: fflush(NULL); if (cmd->close_object_store) - close_object_store(the_repository->objects); + odb_close(the_repository->objects); #ifndef GIT_WINDOWS_NATIVE { diff --git a/scalar.c b/scalar.c index f754311627..2aeb191cc8 100644 --- a/scalar.c +++ b/scalar.c @@ -931,7 +931,7 @@ static int cmd_delete(int argc, const char **argv) if (dir_inside_of(cwd, enlistment.buf) >= 0) res = error(_("refusing to delete current working directory")); else { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); res = delete_enlistment(&enlistment); } strbuf_release(&enlistment); From f8bdf3127ab7df8a8f3039f41889b35eefe029a3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:52 +0100 Subject: [PATCH 04/13] odb: refactor `odb_clear()` to `odb_free()` The function `odb_clear()` releases all resources allocated to an object database and ensures that all fields become zero'd out. Despite its naming though it doesn't really clear the object database so that it becomes ready for reuse afterwards again -- the caller would first have to reinitialize it, and that contradicts the terminology of "clearing" as we have defined it in our coding guidelines. There isn't really only a reason to have "clearing" semantics, either. There's only a single caller of `odb_clear()`, and that caller also ends up freeing the object database structure itself. Refactor the function to have "freeing" semantics instead, so that the structure itself is also freed, which allows us to drop some useless boilerplate to zero out the structure's members. This refactoring reveals that we're trying to close the commit graph multiple times: once directly via `free_commit_graph()`, and once via `odb_close()`. Drop the former call. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 19 ++++++++----------- odb.h | 4 +++- repository.c | 4 ++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index bcefa5cede..29cf6496c5 100644 --- a/odb.c +++ b/odb.c @@ -1073,30 +1073,27 @@ static void odb_free_sources(struct object_database *o) o->source_by_path = NULL; } -void odb_clear(struct object_database *o) +void odb_free(struct object_database *o) { - FREE_AND_NULL(o->alternate_db); + if (!o) + return; + + free(o->alternate_db); oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); - free_commit_graph(o->commit_graph); - o->commit_graph = NULL; - o->commit_graph_attempted = 0; - odb_free_sources(o); - o->sources_tail = NULL; - o->loaded_alternates = 0; for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); - FREE_AND_NULL(o->cached_objects); + free(o->cached_objects); odb_close(o); packfile_store_free(o->packfiles); - o->packfiles = NULL; - string_list_clear(&o->submodule_source_paths, 0); + + free(o); } void odb_reprepare(struct object_database *o) diff --git a/odb.h b/odb.h index 71b4897c82..77b313b784 100644 --- a/odb.h +++ b/odb.h @@ -167,7 +167,9 @@ struct object_database { }; struct object_database *odb_new(struct repository *repo); -void odb_clear(struct object_database *o); + +/* Free the object database and release all resources. */ +void odb_free(struct object_database *o); /* * Close the object database and all of its sources so that any held resources diff --git a/repository.c b/repository.c index 6aaa7ba008..3c8b3813b0 100644 --- a/repository.c +++ b/repository.c @@ -382,8 +382,8 @@ void repo_clear(struct repository *repo) FREE_AND_NULL(repo->worktree); FREE_AND_NULL(repo->submodule_prefix); - odb_clear(repo->objects); - FREE_AND_NULL(repo->objects); + odb_free(repo->objects); + repo->objects = NULL; parsed_object_pool_clear(repo->parsed_objects); FREE_AND_NULL(repo->parsed_objects); From b67b2d9fb7ccfaa72446d76abc8c36849d2e0685 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:53 +0100 Subject: [PATCH 05/13] odb: move logic to disable ref updates into repo Our object database sources have a field `disable_ref_updates`. This field can obviously be set to disable reference updates, but it is somewhat curious that this logic is hosted by the object database. The reason for this is that it was primarily added to keep us from accidentally updating references while an ODB transaction is ongoing. Any objects part of the transaction have not yet been committed to disk, so new references that point to them might get corrupted in case we never end up committing the transaction. As such, whenever we create a new transaction we set up a new temporary ODB source and mark it as disabling reference updates. This has one (and only one?) upside: once we have committed the transaction, the temporary source will be dropped and thus we clean up the disabled reference updates automatically. But other than that, it's somewhat misdesigned: - We can have multiple ODB sources, but only the currently active source inhibits reference updates. - We're mixing concerns of the refdb with the ODB. Arguably, the decision of whether we can update references or not should be handled by the refdb. But that wouldn't be a great fit either, as there can be one refdb per worktree. So we'd again have the same problem that a "global" intent becomes localized to a specific instance. Instead, move the setting into the repository. While at it, convert it into a boolean. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 3 ++- odb.h | 7 ------- refs.c | 2 +- repository.c | 2 +- repository.h | 9 ++++++++- setup.c | 2 +- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/odb.c b/odb.c index 29cf6496c5..ccc6e999e7 100644 --- a/odb.c +++ b/odb.c @@ -360,7 +360,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, * Disable ref updates while a temporary odb is active, since * the objects in the database may roll back. */ - source->disable_ref_updates = 1; + odb->repo->disable_ref_updates = true; source->will_destroy = will_destroy; source->next = odb->sources; odb->sources = source; @@ -387,6 +387,7 @@ void odb_restore_primary_source(struct object_database *odb, if (cur_source->next != restore_source) BUG("we expect the old primary object store to be the first alternate"); + odb->repo->disable_ref_updates = false; odb->sources = restore_source; odb_source_free(cur_source); } diff --git a/odb.h b/odb.h index 77b313b784..99c4d48972 100644 --- a/odb.h +++ b/odb.h @@ -66,13 +66,6 @@ struct odb_source { */ bool local; - /* - * This is a temporary object store created by the tmp_objdir - * facility. Disable ref updates since the objects in the store - * might be discarded on rollback. - */ - int disable_ref_updates; - /* * This object store is ephemeral, so there is no need to fsync. */ diff --git a/refs.c b/refs.c index 965381367e..6c7283d9eb 100644 --- a/refs.c +++ b/refs.c @@ -2491,7 +2491,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction, break; } - if (refs->repo->objects->sources->disable_ref_updates) { + if (refs->repo->disable_ref_updates) { strbuf_addstr(err, _("ref updates forbidden inside quarantine environment")); return -1; diff --git a/repository.c b/repository.c index 3c8b3813b0..455c2d279f 100644 --- a/repository.c +++ b/repository.c @@ -179,7 +179,7 @@ void repo_set_gitdir(struct repository *repo, repo->objects->sources->path = objects_path; } - repo->objects->sources->disable_ref_updates = o->disable_ref_updates; + repo->disable_ref_updates = o->disable_ref_updates; free(repo->objects->alternate_db); repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); diff --git a/repository.h b/repository.h index 5808a5d610..614649413b 100644 --- a/repository.h +++ b/repository.h @@ -71,6 +71,13 @@ struct repository { */ struct ref_store *refs_private; + /* + * Disable ref updates. This is especially used in contexts where + * transactions may still be rolled back so that we don't start to + * reference objects that may vanish. + */ + bool disable_ref_updates; + /* * A strmap of ref_stores, stored by submodule name, accessible via * `repo_get_submodule_ref_store()`. @@ -187,7 +194,7 @@ struct set_gitdir_args { const char *graft_file; const char *index_file; const char *alternate_db; - int disable_ref_updates; + bool disable_ref_updates; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/setup.c b/setup.c index 8bf52df716..a752e9fc84 100644 --- a/setup.c +++ b/setup.c @@ -1682,7 +1682,7 @@ void setup_git_env(const char *git_dir) args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { - args.disable_ref_updates = 1; + args.disable_ref_updates = true; } repo_set_gitdir(the_repository, git_dir, &args); From 5d795b34dcbf46039c3dda028bb8df8d75a5a9d0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:54 +0100 Subject: [PATCH 06/13] oidset: introduce `oidset_equal()` Introduce a new function that allows the caller to verify whether two oidsets contain the exact same object IDs. Note that this change requires us to change `oidset_iter_init()` to accept a `const struct oidset`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- oidset.c | 16 ++++++++++++++++ oidset.h | 9 +++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/oidset.c b/oidset.c index 8d36aef8dc..c8ff0b385c 100644 --- a/oidset.c +++ b/oidset.c @@ -16,6 +16,22 @@ int oidset_contains(const struct oidset *set, const struct object_id *oid) return pos != kh_end(&set->set); } +bool oidset_equal(const struct oidset *a, const struct oidset *b) +{ + struct oidset_iter iter; + struct object_id *a_oid; + + if (oidset_size(a) != oidset_size(b)) + return false; + + oidset_iter_init(a, &iter); + while ((a_oid = oidset_iter_next(&iter))) + if (!oidset_contains(b, a_oid)) + return false; + + return true; +} + int oidset_insert(struct oidset *set, const struct object_id *oid) { int added; diff --git a/oidset.h b/oidset.h index 0106b6f278..e0f1a6ff4f 100644 --- a/oidset.h +++ b/oidset.h @@ -38,6 +38,11 @@ void oidset_init(struct oidset *set, size_t initial_size); */ int oidset_contains(const struct oidset *set, const struct object_id *oid); +/** + * Returns true iff `a` and `b` contain the exact same OIDs. + */ +bool oidset_equal(const struct oidset *a, const struct oidset *b); + /** * Insert the oid into the set; a copy is made, so "oid" does not need * to persist after this function is called. @@ -94,11 +99,11 @@ void oidset_parse_file_carefully(struct oidset *set, const char *path, oidset_parse_tweak_fn fn, void *cbdata); struct oidset_iter { - kh_oid_set_t *set; + const kh_oid_set_t *set; khiter_t iter; }; -static inline void oidset_iter_init(struct oidset *set, +static inline void oidset_iter_init(const struct oidset *set, struct oidset_iter *iter) { iter->set = &set->set; From 8dc22e87f000a092b62b4fb08e2542433f1ae192 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:55 +0100 Subject: [PATCH 07/13] builtin/index-pack: fix deferred fsck outside repos When asked to perform object consistency checks via the `--fsck-objects` flag we verify that each object part of the pack is valid. In general, this check can even be performed outside of a Git repository: we don't need an initialized object database as we simply read the object from the packfile directly. But there's one exception: a subset of the object checks may be deferred to a later point in time. For now, this only concerns ".gitmodules" and ".gitattributes" files: whenever we see a tree referencing these files we queue them for a deferred check. This is done because we need to do some extra checks for those files to ensure that they are well-formed, and these checks need to be done regardless of whether the corresponding blobs are part of the packfile or not. This works inside a repository, but unfortunately the logic leads to a segfault when running outside of one. This is because we eventually call `odb_read_object()`, which will crash because the object database has not been initialized. There's multiple options here: - We could in theory create a purely in-memory database with only a packfile store that contains the single packfile. We don't really have the infrastructure for this yet though, and it would end up being quite hacky. - We could refuse to perform consistency checks outside of a repository. But most of the checks work alright, so this would be a regression. - We can skip the finalizing consistency checks when running outside of a repository. This is not as invasive as skipping all checks, but it's not great to randomly skip a subset of tests, either. None of these options really feel perfect. The first one would be the obvious choice if easily possible. There's another option though: instead of skipping the final object checks, we can die if there are any queued object checks. With this change we now die exactly if and only if we would have previously segfaulted. Like this we ensure that objects that _may_ fail the consistency checks won't be silently skipped, and at the same time we give users a much better error message. Refactor the code accordingly and add a test that would have triggered the segfault. Note that we also move down the logic to add the packfile to the store. There is no point doing this any earlier than right before we execute `fsck_finish()`, and it ensures that the logic to set up and perform the consistency check is self-contained. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 21 ++++++++++++++++++--- fsck.c | 6 ++++++ fsck.h | 7 +++++++ t/t5302-pack-index.sh | 16 ++++++++++++++++ 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4..699fe678cd 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,7 +1640,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) + if (do_fsck_object && startup_info->have_repository) packfile_store_load_pack(the_repository->objects->packfiles, final_index_name, 0); @@ -2110,8 +2110,23 @@ int cmd_index_pack(int argc, else close(input_fd); - if (do_fsck_object && fsck_finish(&fsck_options)) - die(_("fsck error in pack objects")); + if (do_fsck_object) { + /* + * We cannot perform queued consistency checks when running + * outside of a repository because those require us to read + * from the object database, which is uninitialized. + * + * TODO: we may eventually set up an in-memory object database, + * which would allow us to perform these queued checks. + */ + if (!startup_info->have_repository && + fsck_has_queued_checks(&fsck_options)) + die(_("cannot perform queued object checks outside " + "of a repository")); + + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } free(opts.anomaly); free(objects); diff --git a/fsck.c b/fsck.c index 341e100d24..8e1565fe6d 100644 --- a/fsck.c +++ b/fsck.c @@ -1350,6 +1350,12 @@ int fsck_finish(struct fsck_options *options) return ret; } +bool fsck_has_queued_checks(struct fsck_options *options) +{ + return !oidset_equal(&options->gitmodules_found, &options->gitmodules_done) || + !oidset_equal(&options->gitattributes_found, &options->gitattributes_done); +} + void fsck_options_clear(struct fsck_options *options) { free(options->msg_type); diff --git a/fsck.h b/fsck.h index cb6ef32f4f..336917c045 100644 --- a/fsck.h +++ b/fsck.h @@ -248,6 +248,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, */ int fsck_finish(struct fsck_options *options); +/* + * Check whether there are any checks that have been queued up and that still + * need to be run. Returns `false` iff `fsck_finish()` wouldn't perform any + * actions, `true` otherwise. + */ +bool fsck_has_queued_checks(struct fsck_options *options); + /* * Clear the fsck_options struct, freeing any allocated memory. */ diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 413c99274c..9697448cb2 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -293,4 +293,20 @@ test_expect_success 'too-large packs report the breach' ' grep "maximum allowed size (20 bytes)" err ' +# git-index-pack(1) uses the default hash algorithm outside of the repository, +# and it has no way to tell it otherwise. So we can only run this test with the +# default hash algorithm, as it would otherwise fail to parse the tree. +test_expect_success DEFAULT_HASH_ALGORITHM 'index-pack --fsck-objects outside of a repo' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + printf "100644 blob $(test_oid 001)\t.gitattributes\n" >tree && + git mktree --missing tree-oid && + git pack-objects err && + test_grep "cannot perform queued object checks outside of a repository" err + ) +' + test_done From eea83c010cf431325a05f06cc7c64029538c8495 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:56 +0100 Subject: [PATCH 08/13] t/helper: stop setting up `the_repository` repeatedly The "repository" test helper sets up `the_repository` twice. In fact though, we don't even have to set it up even once: all we need is to set up its hash algorithm, because we still depend on some subsystems that aren't free of `the_repository`. Refactor the code accordingly. This prepares for a subsequent change, where setting up the repository repeatedly will lead to a `BUG()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/helper/test-repository.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c index 63c37de33d..9ba94cdffa 100644 --- a/t/helper/test-repository.c +++ b/t/helper/test-repository.c @@ -17,10 +17,6 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree, struct commit *c; struct commit_list *parent; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -47,10 +43,6 @@ static void test_get_commit_tree_in_graph(const char *gitdir, struct commit *c; struct tree *tree; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -75,24 +67,20 @@ static void test_get_commit_tree_in_graph(const char *gitdir, int cmd__repository(int argc, const char **argv) { - int nongit_ok = 0; - - setup_git_directory_gently(&nongit_ok); - if (argc < 2) die("must have at least 2 arguments"); if (!strcmp(argv[1], "parse_commit_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_parse_commit_in_graph(argv[2], argv[3], &oid); } else if (!strcmp(argv[1], "get_commit_tree_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_get_commit_tree_in_graph(argv[2], argv[3], &oid); } else { From c257bd59165e2f55dfa2c97b0ca1e39131513654 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:57 +0100 Subject: [PATCH 09/13] http-push: stop setting up `the_repository` for each reference When pushing references via HTTP we call `repo_init_revisions()` in a loop for each reference that we're about to push. As third argument we pass the result of `setup_git_directory()`, which causes us to reinitialize the repository every single time. This is an obvious waste of compute, as the repository that we're working in will never change across any of the initializations. The only reason that we do this is to retrieve the directory of the repository. Furthermore, this is about to create issues in a subsequent commit, where reinitializing the repository will cause a `BUG()`. Address this by storing the Git directory in a variable instead so that we don't have to call the function repeatedly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- http-push.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/http-push.c b/http-push.c index a1c01e3b9b..a48ca23799 100644 --- a/http-push.c +++ b/http-push.c @@ -1725,6 +1725,7 @@ int cmd_main(int argc, const char **argv) int i; int new_refs; struct ref *ref, *local_refs = NULL; + const char *gitdir; CALLOC_ARRAY(repo, 1); @@ -1787,7 +1788,7 @@ int cmd_main(int argc, const char **argv) if (delete_branch && rs.nr != 1) die("You must specify only one branch name when deleting a remote branch"); - setup_git_directory(); + gitdir = setup_git_directory(); memset(remote_dir_exists, -1, 256); @@ -1941,7 +1942,7 @@ int cmd_main(int argc, const char **argv) if (!push_all && !is_null_oid(&ref->old_oid)) strvec_pushf(&commit_argv, "^%s", oid_to_hex(&ref->old_oid)); - repo_init_revisions(the_repository, &revs, setup_git_directory()); + repo_init_revisions(the_repository, &revs, gitdir); setup_revisions_from_strvec(&commit_argv, &revs, NULL); revs.edge_hint = 0; /* just in case */ From 35d9fc65edc0a5df9f714d02afaa2c942fb28570 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:58 +0100 Subject: [PATCH 10/13] odb: handle initialization of sources in `odb_new()` The logic to set up a new object database is currently distributed across two functions in "repository.c": - In `initialize_repository()` we initialize an empty object database. This object database is not fully initialized and doesn't have any sources attached to it. - The primary object database source is then created in `repo_set_gitdir()`. Ideally though, the logic should be entirely self-contained so that we can iterate more readily on how exactly the sources themselves get set up. Refactor `odb_new()` to handle both allocation and setup of the object database. This ensures that the object database is always initialized and ready for use, and it allows us to change how the sources get set up eventually. Note that `repo_set_gitdir()` still reaches into the sources when the function gets called with an already-initialized object database. This will be fixed in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 14 +++++++++++++- odb.h | 15 ++++++++++++++- repository.c | 18 +++++++----------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/odb.c b/odb.c index ccc6e999e7..88b40c81c0 100644 --- a/odb.c +++ b/odb.c @@ -1034,15 +1034,27 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_loose_write_stream(odb->sources, stream, len, oid); } -struct object_database *odb_new(struct repository *repo) +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *secondary_sources) { struct object_database *o = xmalloc(sizeof(*o)); + char *to_free = NULL; memset(o, 0, sizeof(*o)); o->repo = repo; o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); + + if (!primary_source) + primary_source = to_free = xstrfmt("%s/objects", repo->commondir); + o->sources = odb_source_new(o, primary_source, true); + o->sources_tail = &o->sources->next; + o->alternate_db = xstrdup_or_null(secondary_sources); + + free(to_free); + return o; } diff --git a/odb.h b/odb.h index 99c4d48972..41b3c03027 100644 --- a/odb.h +++ b/odb.h @@ -159,7 +159,20 @@ struct object_database { struct string_list submodule_source_paths; }; -struct object_database *odb_new(struct repository *repo); +/* + * Create a new object database for the given repository. + * + * If the primary source parameter is set it will override the usual primary + * object directory derived from the repository's common directory. The + * alternate sources are expected to be a PATH_SEP-separated list of secondary + * sources. Note that these alternate sources will be added in addition to, not + * instead of, the alternates identified by the primary source. + * + * Returns the newly created object database. + */ +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *alternate_sources); /* Free the object database and release all resources. */ void odb_free(struct object_database *o); diff --git a/repository.c b/repository.c index 455c2d279f..5975c8f341 100644 --- a/repository.c +++ b/repository.c @@ -52,7 +52,6 @@ static void set_default_hash_algo(struct repository *repo) void initialize_repository(struct repository *repo) { - repo->objects = odb_new(repo); repo->remote_state = remote_state_new(); repo->parsed_objects = parsed_object_pool_new(repo); ALLOC_ARRAY(repo->index, 1); @@ -160,29 +159,26 @@ void repo_set_gitdir(struct repository *repo, * until after xstrdup(root). Then we can free it. */ char *old_gitdir = repo->gitdir; - char *objects_path = NULL; repo->gitdir = xstrdup(gitfile ? gitfile : root); free(old_gitdir); repo_set_commondir(repo, o->commondir); - expand_base_dir(&objects_path, o->object_dir, - repo->commondir, "objects"); - if (!repo->objects->sources) { - repo->objects->sources = odb_source_new(repo->objects, - objects_path, true); - repo->objects->sources_tail = &repo->objects->sources->next; - free(objects_path); + if (!repo->objects) { + repo->objects = odb_new(repo, o->object_dir, o->alternate_db); } else { + char *objects_path = NULL; + expand_base_dir(&objects_path, o->object_dir, + repo->commondir, "objects"); free(repo->objects->sources->path); repo->objects->sources->path = objects_path; + free(repo->objects->alternate_db); + repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); } repo->disable_ref_updates = o->disable_ref_updates; - free(repo->objects->alternate_db); - repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); expand_base_dir(&repo->graft_file, o->graft_file, repo->commondir, "info/grafts"); expand_base_dir(&repo->index_file, o->index_file, From 2574c617362a0c67d15fa01e01cdbd0f6bcdbc93 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:59 +0100 Subject: [PATCH 11/13] chdir-notify: add function to unregister listeners While we (obviously) have a way to register new listeners that get called whenever we chdir(3p), we don't have an equivalent that can be used to unregister such a listener again. Add one, as it will be required in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- chdir-notify.c | 18 ++++++++++++++++++ chdir-notify.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/chdir-notify.c b/chdir-notify.c index 0d7bc04607..f8bfe3cbef 100644 --- a/chdir-notify.c +++ b/chdir-notify.c @@ -25,6 +25,24 @@ void chdir_notify_register(const char *name, list_add_tail(&e->list, &chdir_notify_entries); } +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data) +{ + struct list_head *pos, *p; + + list_for_each_safe(pos, p, &chdir_notify_entries) { + struct chdir_notify_entry *e = + list_entry(pos, struct chdir_notify_entry, list); + + if (e->cb != cb || e->data != data || !e->name != !name || + (e->name && strcmp(e->name, name))) + continue; + + list_del(pos); + free(e); + } +} + static void reparent_cb(const char *name, const char *old_cwd, const char *new_cwd, diff --git a/chdir-notify.h b/chdir-notify.h index 366e4c1ee9..81eb69d846 100644 --- a/chdir-notify.h +++ b/chdir-notify.h @@ -41,6 +41,8 @@ typedef void (*chdir_notify_callback)(const char *name, const char *new_cwd, void *data); void chdir_notify_register(const char *name, chdir_notify_callback cb, void *data); +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data); void chdir_notify_reparent(const char *name, char **path); /* From 2816b748e5c300afda559a09426f8342c235c29d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:51:00 +0100 Subject: [PATCH 12/13] odb: handle changing a repository's commondir The function `repo_set_gitdir()` is called in two situations: - To initialize the repository with its discovered location. As part of this we also set up the new object database. - To update the repository's discovered location in case the process changes its working directory so that we update relative paths. This means we also have to update any relative paths that are potentially used in the object database. In the context of the object database we ideally wouldn't ever have to worry about the second case: if all paths used by our object database sources were absolute, then we wouldn't have to update them. But unfortunately, the paths aren't only used to locate files owned by the given source, but we also use them for reporting purposes. One such example is `repo_get_object_directory()`, where we cannot just change semantics to always return absolute paths, as that is likely to break tooling out there. One solution to this would be to have both a "display path" and an "internal path". This would allow us to use internal paths for all internal matters, but continue to use the potentially-relative display paths so that we don't break compatibility. But converting the codebase to honor this split is quite a messy endeavour, and it wouldn't even help us with the goal to get rid of the need to update the display path on chdir(3p). Another solution would be to rework "setup.c" so that we never have to update paths in the first place. In that case, we'd only initialize the repository once we have figured out final locations for all directories. This would be a significant simplification of that subsystem indeed, but the current logic is so messy that it would take significant investments to get there. Meanwhile though, while object sources may still use relative paths, the best thing we can do is to handle the reparenting of the object source paths in the object database itself. This can be done by registering one callback for each object database so that we get notified whenever the current working directory changes, and we then perform the reparenting ourselves. Ideally, this wouldn't even happen on the object database level, but instead handled by each object database source. But we don't yet have proper pluggable object database sources, so this will need to be handled at a later point in time. The logic itself is rather simple: - We register the callback when creating the object database. - We unregister the callback when releasing it again. - We split up `set_git_dir_1()` so that it becomes possible to skip recreating the object database. This is required because the function is called both when the current working directory changes, but also when we set up the repository. Calling this function without skipping creation of the ODB will result in a bug in case it's already created. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 37 +++++++++++++++++++++-- odb.h | 4 --- repository.c | 13 ++------ repository.h | 1 + setup.c | 84 ++++++++++++++++++++++++++++------------------------ 5 files changed, 83 insertions(+), 56 deletions(-) diff --git a/odb.c b/odb.c index 88b40c81c0..70665fb7f4 100644 --- a/odb.c +++ b/odb.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "abspath.h" +#include "chdir-notify.h" #include "commit-graph.h" #include "config.h" #include "dir.h" @@ -142,9 +143,9 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local) +static struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) { struct odb_source *source; @@ -1034,6 +1035,32 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_loose_write_stream(odb->sources, stream, len, oid); } +static void odb_update_commondir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *cb_data) +{ + struct object_database *odb = cb_data; + struct odb_source *source; + + /* + * In theory, we only have to do this for the primary object source, as + * alternates' paths are always resolved to an absolute path. + */ + for (source = odb->sources; source; source = source->next) { + char *path; + + if (is_absolute_path(source->path)) + continue; + + path = reparent_relative_path(old_cwd, new_cwd, + source->path); + + free(source->path); + source->path = path; + } +} + struct object_database *odb_new(struct repository *repo, const char *primary_source, const char *secondary_sources) @@ -1055,6 +1082,8 @@ struct object_database *odb_new(struct repository *repo, free(to_free); + chdir_notify_register(NULL, odb_update_commondir, o); + return o; } @@ -1106,6 +1135,8 @@ void odb_free(struct object_database *o) packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); + chdir_notify_unregister(NULL, odb_update_commondir, o); + free(o); } diff --git a/odb.h b/odb.h index 41b3c03027..014cd9585a 100644 --- a/odb.h +++ b/odb.h @@ -78,10 +78,6 @@ struct odb_source { char *path; }; -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local); - struct packed_git; struct packfile_store; struct cached_object_entry; diff --git a/repository.c b/repository.c index 5975c8f341..863f24411b 100644 --- a/repository.c +++ b/repository.c @@ -165,17 +165,10 @@ void repo_set_gitdir(struct repository *repo, repo_set_commondir(repo, o->commondir); - if (!repo->objects) { + if (!repo->objects) repo->objects = odb_new(repo, o->object_dir, o->alternate_db); - } else { - char *objects_path = NULL; - expand_base_dir(&objects_path, o->object_dir, - repo->commondir, "objects"); - free(repo->objects->sources->path); - repo->objects->sources->path = objects_path; - free(repo->objects->alternate_db); - repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); - } + else if (!o->skip_initializing_odb) + BUG("cannot reinitialize an already-initialized object directory"); repo->disable_ref_updates = o->disable_ref_updates; diff --git a/repository.h b/repository.h index 614649413b..6063c4b846 100644 --- a/repository.h +++ b/repository.h @@ -195,6 +195,7 @@ struct set_gitdir_args { const char *index_file; const char *alternate_db; bool disable_ref_updates; + bool skip_initializing_odb; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/setup.c b/setup.c index a752e9fc84..a625f9fbc8 100644 --- a/setup.c +++ b/setup.c @@ -1002,10 +1002,51 @@ cleanup_return: return error_code ? NULL : path; } -static void set_git_dir_1(const char *path) +static void setup_git_env_internal(const char *git_dir, + bool skip_initializing_odb) +{ + char *git_replace_ref_base; + const char *shallow_file; + const char *replace_ref_base; + struct set_gitdir_args args = { NULL }; + struct strvec to_free = STRVEC_INIT; + + args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); + args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); + args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); + args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); + args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); + if (getenv(GIT_QUARANTINE_ENVIRONMENT)) + args.disable_ref_updates = true; + args.skip_initializing_odb = skip_initializing_odb; + + repo_set_gitdir(the_repository, git_dir, &args); + strvec_clear(&to_free); + + if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) + disable_replace_refs(); + replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); + git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base + : "refs/replace/"); + update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); + + shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); + if (shallow_file) + set_alternate_shallow_file(the_repository, shallow_file, 0); + + if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) + fetch_if_missing = 0; +} + +void setup_git_env(const char *git_dir) +{ + setup_git_env_internal(git_dir, false); +} + +static void set_git_dir_1(const char *path, bool skip_initializing_odb) { xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env(path); + setup_git_env_internal(path, skip_initializing_odb); } static void update_relative_gitdir(const char *name UNUSED, @@ -1020,7 +1061,7 @@ static void update_relative_gitdir(const char *name UNUSED, trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); - set_git_dir_1(path); + set_git_dir_1(path, true); if (tmp_objdir) tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); @@ -1035,7 +1076,7 @@ static void set_git_dir(const char *path, int make_realpath) path = realpath.buf; } - set_git_dir_1(path); + set_git_dir_1(path, false); if (!is_absolute_path(path)) chdir_notify_register(NULL, update_relative_gitdir, NULL); @@ -1668,41 +1709,6 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } -void setup_git_env(const char *git_dir) -{ - char *git_replace_ref_base; - const char *shallow_file; - const char *replace_ref_base; - struct set_gitdir_args args = { NULL }; - struct strvec to_free = STRVEC_INIT; - - args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); - args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); - args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); - args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); - args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); - if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { - args.disable_ref_updates = true; - } - - repo_set_gitdir(the_repository, git_dir, &args); - strvec_clear(&to_free); - - if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) - disable_replace_refs(); - replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); - git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base - : "refs/replace/"); - update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); - - shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); - if (shallow_file) - set_alternate_shallow_file(the_repository, shallow_file, 0); - - if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) - fetch_if_missing = 0; -} - const char *enter_repo(const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; From ac65c70663b092e823b0d3de1c1cfdee0a4fbc8e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:51:01 +0100 Subject: [PATCH 13/13] odb: handle recreation of quarantine directories In the preceding commit we have moved the logic that reparents object database sources on chdir(3p) from "setup.c" into "odb.c". Let's also do the same for any temporary quarantine directories so that the complete reparenting logic is self-contained in "odb.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 7 +++++++ setup.c | 5 ----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index 70665fb7f4..dc8f292f3d 100644 --- a/odb.c +++ b/odb.c @@ -24,6 +24,7 @@ #include "strbuf.h" #include "strvec.h" #include "submodule.h" +#include "tmp-objdir.h" #include "trace2.h" #include "write-or-die.h" @@ -1041,8 +1042,11 @@ static void odb_update_commondir(const char *name UNUSED, void *cb_data) { struct object_database *odb = cb_data; + struct tmp_objdir *tmp_objdir; struct odb_source *source; + tmp_objdir = tmp_objdir_unapply_primary_odb(); + /* * In theory, we only have to do this for the primary object source, as * alternates' paths are always resolved to an absolute path. @@ -1059,6 +1063,9 @@ static void odb_update_commondir(const char *name UNUSED, free(source->path); source->path = path; } + + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); } struct object_database *odb_new(struct repository *repo, diff --git a/setup.c b/setup.c index a625f9fbc8..ae66188af3 100644 --- a/setup.c +++ b/setup.c @@ -22,7 +22,6 @@ #include "chdir-notify.h" #include "path.h" #include "quote.h" -#include "tmp-objdir.h" #include "trace.h" #include "trace2.h" #include "worktree.h" @@ -1056,14 +1055,10 @@ static void update_relative_gitdir(const char *name UNUSED, { char *path = reparent_relative_path(old_cwd, new_cwd, repo_get_git_dir(the_repository)); - struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); - trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); set_git_dir_1(path, true); - if (tmp_objdir) - tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); }