From ca6daa1368eb9b0b48f64ef57907821318d7971c Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:27 +0000 Subject: [PATCH 01/11] hash: add a constant for the default hash algorithm Right now, SHA-1 is the default hash algorithm in Git. However, this may change in the future. We have many places in our code that use the SHA-1 constant to indicate the default hash if none is specified, but it will end up being more practical to specify this explicitly and clearly using a constant for whatever the default hash algorithm is. Then, if we decide to change it in the future, we can simply replace the constant representing the default with a new value. For these reasons, introduce GIT_HASH_DEFAULT to represent the default hash algorithm. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hash.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hash.h b/hash.h index d6422ddf45..0d3d85e04c 100644 --- a/hash.h +++ b/hash.h @@ -174,6 +174,8 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s #define GIT_HASH_SHA256 2 /* Number of algorithms supported (including unknown). */ #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1) +/* Default hash algorithm if unspecified. */ +#define GIT_HASH_DEFAULT GIT_HASH_SHA1 /* "sha1", big-endian */ #define GIT_SHA1_FORMAT_ID 0x73686131 From 1f68f3da877a91fefd6cc84b79986af2ef73d21e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:28 +0000 Subject: [PATCH 02/11] hash: add a constant for the legacy hash algorithm We have a a variety of uses of GIT_HASH_SHA1 littered throughout our code. Some of these really mean to represent specifically SHA-1, but some actually represent the original hash algorithm used in Git which is implied by older, legacy formats and protocols which do not contain hash information. For instance, the bundle v1 and v2 formats do not contain hash algorithm information, and thus SHA-1 is implied by the use of these formats. Add a constant for documentary purposes which indicates this value. It will always be the same as SHA-1, since this is an essential part of these formats, but its use indicates this particular reason and not any other reason why SHA-1 might be used. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hash.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hash.h b/hash.h index 0d3d85e04c..953e840d15 100644 --- a/hash.h +++ b/hash.h @@ -176,6 +176,8 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1) /* Default hash algorithm if unspecified. */ #define GIT_HASH_DEFAULT GIT_HASH_SHA1 +/* Legacy hash algorithm. Implied for older data formats which don't specify. */ +#define GIT_HASH_SHA1_LEGACY GIT_HASH_SHA1 /* "sha1", big-endian */ #define GIT_SHA1_FORMAT_ID 0x73686131 From dc9c16c2fc8222364277696cb4d70782281d3c06 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:29 +0000 Subject: [PATCH 03/11] builtin: use default hash when outside a repository We have some commands that can operate inside or outside a repository. If we're operating outside a repository, we clearly cannot use the repository's hash algorithm as a default since it doesn't exist, so instead, let's pick the default instead of specifically SHA-1. Right now this results in no functional change since the default is SHA-1, but that may change in the future. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/apply.c | 2 +- builtin/diff.c | 2 +- builtin/hash-object.c | 2 +- builtin/index-pack.c | 2 +- builtin/ls-remote.c | 2 +- builtin/patch-id.c | 2 +- builtin/shortlog.c | 2 +- builtin/show-index.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index a1e20c593d..d642a40251 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -29,7 +29,7 @@ int cmd_apply(int argc, * cf. https://lore.kernel.org/git/xmqqcypfcmn4.fsf@gitster.g/ */ if (!the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); argc = apply_parse_options(argc, argv, &state, &force_apply, &options, diff --git a/builtin/diff.c b/builtin/diff.c index fa963808c3..357702df9e 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -483,7 +483,7 @@ int cmd_diff(int argc, * configurable via a command line option. */ if (nongit) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); init_diff_ui_defaults(); git_config(git_diff_ui_config, NULL); diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 6a99ec250d..213a302e05 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -104,7 +104,7 @@ int cmd_hash_object(int argc, prefix = setup_git_directory_gently(&nongit); if (nongit && !the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); if (vpath && prefix) { vpath_free = prefix_filename(prefix, vpath); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index bb7925bd29..352ce7f88a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -2034,7 +2034,7 @@ int cmd_index_pack(int argc, * choice but to guess the object hash. */ if (!the_repository->hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); opts.flags &= ~(WRITE_REV | WRITE_REV_VERIFY); if (rev_index) { diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 01a4d4daa1..df09000b30 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -112,7 +112,7 @@ int cmd_ls_remote(int argc, * depending on what object hash the remote uses. */ if (!the_repository->hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); packet_trace_identity("ls-remote"); diff --git a/builtin/patch-id.c b/builtin/patch-id.c index cdef2ec10a..26f04b0335 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -254,7 +254,7 @@ int cmd_patch_id(int argc, * the code that computes patch IDs to always use SHA1. */ if (!the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); generate_id_list(opts ? opts > 1 : config.stable, opts ? opts == 3 : config.verbatim); diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 30075b67be..795a631625 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -418,7 +418,7 @@ int cmd_shortlog(int argc, * git/nongit so that we do not have to do this. */ if (nongit && !the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); git_config(git_default_config, NULL); shortlog_init(&log); diff --git a/builtin/show-index.c b/builtin/show-index.c index 9d4ecf5e7b..2c3e2940ce 100644 --- a/builtin/show-index.c +++ b/builtin/show-index.c @@ -47,7 +47,7 @@ int cmd_show_index(int argc, * the index file passed in and use that instead. */ if (!the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); hashsz = the_hash_algo->rawsz; From 667d251a04c1dd769fb5a71bbe94d6d15ae594f1 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:30 +0000 Subject: [PATCH 04/11] Use legacy hash for legacy formats We have a large variety of data formats and protocols where no hash algorithm was defined and the default was assumed to always be SHA-1. Instead of explicitly stating SHA-1, let's use the constant to represent the legacy hash algorithm (which is still SHA-1) so that it's clear for documentary purposes that it's a legacy fallback option and not an intentional choice to use SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- bundle.c | 4 ++-- connect.c | 6 +++--- fetch-pack.c | 2 +- pkt-line.c | 2 +- remote-curl.c | 2 +- serve.c | 2 +- setup.c | 4 ++-- transport.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index a317d6c278..24b33a3a5c 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2136,7 +2136,7 @@ static struct command *read_head_info(struct packet_reader *reader, use_push_options = 1; hash = parse_feature_value(feature_list, "object-format", &len, NULL); if (!hash) { - hash = hash_algos[GIT_HASH_SHA1].name; + hash = hash_algos[GIT_HASH_SHA1_LEGACY].name; len = strlen(hash); } if (xstrncmpz(the_hash_algo->name, hash, len)) diff --git a/bundle.c b/bundle.c index b0a3fee2ef..61e81bb0c3 100644 --- a/bundle.c +++ b/bundle.c @@ -95,7 +95,7 @@ int read_bundle_header_fd(int fd, struct bundle_header *header, * by an "object-format=" capability, which is being handled in * `parse_capability()`. */ - header->hash_algo = &hash_algos[GIT_HASH_SHA1]; + header->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; /* The bundle header ends with an empty line */ while (!strbuf_getwholeline_fd(&buf, fd, '\n') && @@ -507,7 +507,7 @@ int create_bundle(struct repository *r, const char *path, * SHA1. * 2. @filter is required because we parsed an object filter. */ - if (the_hash_algo != &hash_algos[GIT_HASH_SHA1] || revs.filter.choice) + if (the_hash_algo != &hash_algos[GIT_HASH_SHA1_LEGACY] || revs.filter.choice) min_version = 3; if (argc > 1) { diff --git a/connect.c b/connect.c index 3280435331..e77287f426 100644 --- a/connect.c +++ b/connect.c @@ -251,7 +251,7 @@ static void process_capabilities(struct packet_reader *reader, size_t *linelen) reader->hash_algo = &hash_algos[hash_algo]; free(hash_name); } else { - reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; } } @@ -500,7 +500,7 @@ static void send_capabilities(int fd_out, struct packet_reader *reader) reader->hash_algo = &hash_algos[hash_algo]; packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name); } else { - reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; } if (server_feature_v2("promisor-remote", &promisor_remote_info)) { char *reply = promisor_remote_reply(promisor_remote_info); @@ -665,7 +665,7 @@ int server_supports_hash(const char *desired, int *feature_supported) if (feature_supported) *feature_supported = !!hash; if (!hash) { - hash = hash_algos[GIT_HASH_SHA1].name; + hash = hash_algos[GIT_HASH_SHA1_LEGACY].name; len = strlen(hash); } while (hash) { diff --git a/fetch-pack.c b/fetch-pack.c index fa4231fee7..95f66ffc1d 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1342,7 +1342,7 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf, die(_("mismatched algorithms: client %s; server %s"), the_hash_algo->name, hash_name); packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); - } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { + } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1_LEGACY) { die(_("the server does not support algorithm '%s'"), the_hash_algo->name); } diff --git a/pkt-line.c b/pkt-line.c index a5bcbc96fb..fc583feb26 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -617,7 +617,7 @@ void packet_reader_init(struct packet_reader *reader, int fd, reader->buffer_size = sizeof(packet_buffer); reader->options = options; reader->me = "git"; - reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; strbuf_init(&reader->scratch, 0); } diff --git a/remote-curl.c b/remote-curl.c index b8bc3a80cf..84f4694780 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -285,7 +285,7 @@ static const struct git_hash_algo *detect_hash_algo(struct discovery *heads) * back to SHA1, which may or may not be correct. */ if (!p) - return &hash_algos[GIT_HASH_SHA1]; + return &hash_algos[GIT_HASH_SHA1_LEGACY]; algo = hash_algo_by_length((p - heads->buf) / 2); if (algo == GIT_HASH_UNKNOWN) diff --git a/serve.c b/serve.c index e3ccf1505c..53ecab3b42 100644 --- a/serve.c +++ b/serve.c @@ -14,7 +14,7 @@ static int advertise_sid = -1; static int advertise_object_info = -1; -static int client_hash_algo = GIT_HASH_SHA1; +static int client_hash_algo = GIT_HASH_SHA1_LEGACY; static int always_advertise(struct repository *r UNUSED, struct strbuf *value UNUSED) diff --git a/setup.c b/setup.c index f93bd6a24a..3d2b3e745b 100644 --- a/setup.c +++ b/setup.c @@ -2222,11 +2222,11 @@ void initialize_repository_version(int hash_algo, * version will get adjusted by git-clone(1) once it has learned about * the remote repository's format. */ - if (hash_algo != GIT_HASH_SHA1 || + if (hash_algo != GIT_HASH_SHA1_LEGACY || ref_storage_format != REF_STORAGE_FORMAT_FILES) target_version = GIT_REPO_VERSION_READ; - if (hash_algo != GIT_HASH_SHA1 && hash_algo != GIT_HASH_UNKNOWN) + if (hash_algo != GIT_HASH_SHA1_LEGACY && hash_algo != GIT_HASH_UNKNOWN) git_config_set("extensions.objectformat", hash_algos[hash_algo].name); else if (reinit) diff --git a/transport.c b/transport.c index 6c2801bcbd..c123ac1e38 100644 --- a/transport.c +++ b/transport.c @@ -1243,7 +1243,7 @@ struct transport *transport_get(struct remote *remote, const char *url) ret->smart_options->receivepack = remote->receivepack; } - ret->hash_algo = &hash_algos[GIT_HASH_SHA1]; + ret->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; return ret; } From d6e616cee741fc3f67fd3b7c328175b932d0aaa5 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:31 +0000 Subject: [PATCH 05/11] setup: use the default algorithm to initialize repo format When we define a new repository format with REPOSITORY_FORMAT_INIT, we always use GIT_HASH_SHA1, and this value ends up getting used as the default value to initialize a repository if none of the command line, environment, or config tell us to do otherwise. Because we might not always want to use SHA-1 as the default, let's instead specify the default hash algorithm constant so that we will use whatever the specified default is. However, we also need to continue to read older repositories. If we're in a v0 repository or extensions.objectformat is not set, then we must continue to default to the original hash algorithm: SHA-1. If an algorithm is set explicitly, however, it will override the hash_algo member of the repository_format struct and we'll get the right value. Similarly, if the repository was initialized before Git 0.99.3, then it may lack a core.repositoryformatversion key, and some repositories lack a config file altogether. In both cases, format->version is -1 and we need to assume that SHA-1 is in use. Because clear_repository_format reinitializes the struct repository_format and therefore sets the hash_algo member to the default (which could in the future not be SHA-1), we need to reset this member explicitly. We know, however, that at the point we call read_repository_format, we are actually reading an existing repository and not initializing a new one or operating outside of a repository, so we are not changing the default behavior back to SHA-1 if the default algorithm is different. It is potentially questionable that we ignore all repository configuration if there is a config file but it doesn't have core.repositoryformatversion set, in which case we reset all of the configuration to the default. However, it is unclear what the right thing to do instead with such an old repository is and a simple git init will add the missing entry, so for now, we simply honor what the existing code does and reset the value to the default, simply adding our initialization to SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- setup.c | 5 ++++- setup.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.c b/setup.c index 3d2b3e745b..03a61bd06a 100644 --- a/setup.c +++ b/setup.c @@ -835,9 +835,12 @@ static void init_repository_format(struct repository_format *format) int read_repository_format(struct repository_format *format, const char *path) { clear_repository_format(format); + format->hash_algo = GIT_HASH_SHA1_LEGACY; git_config_from_file(check_repo_format, path, format); - if (format->version == -1) + if (format->version == -1) { clear_repository_format(format); + format->hash_algo = GIT_HASH_SHA1_LEGACY; + } return format->version; } diff --git a/setup.h b/setup.h index 18dc3b7368..8522fa8575 100644 --- a/setup.h +++ b/setup.h @@ -149,7 +149,7 @@ struct repository_format { { \ .version = -1, \ .is_bare = -1, \ - .hash_algo = GIT_HASH_SHA1, \ + .hash_algo = GIT_HASH_DEFAULT, \ .ref_storage_format = REF_STORAGE_FORMAT_FILES, \ .unknown_extensions = STRING_LIST_INIT_DUP, \ .v1_only_extensions = STRING_LIST_INIT_DUP, \ From c470ac4ac41b02994f2f10b4134c40661d7435be Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:32 +0000 Subject: [PATCH 06/11] t: default to compile-time default hash if not set Right now, the default compile-time hash is SHA-1. However, in the future, this might change and it would be helpful to gracefully handle this case in our testsuite. To avoid making these assumptions, let's introduce a variable that contains the built-in default hash and use it in our setup code as the fallback value if no hash was explicitly set. For now, this is always SHA-1, but in a future commit, we'll allow adjusting this and the variable will be more useful. To allow us to make our tests more robust, allow test_oid to take the --hash=builtin option to specify this hash, whatever it is. Additionally, add a DEFAULT_HASH_ALGORITHM prerequisite to check for the compile-time hash. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/test-lib-functions.sh | 5 ++++- t/test-lib.sh | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index bee4a2ca34..6ec95ea51f 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1695,7 +1695,7 @@ test_set_hash () { # Detect the hash algorithm in use. test_detect_hash () { - case "$GIT_TEST_DEFAULT_HASH" in + case "${GIT_TEST_DEFAULT_HASH:-$GIT_TEST_BUILTIN_HASH}" in "sha256") test_hash_algo=sha256 test_compat_hash_algo=sha1 @@ -1767,6 +1767,9 @@ test_oid () { --hash=compat) algo="$test_compat_hash_algo" && shift;; + --hash=builtin) + algo="$GIT_TEST_BUILTIN_HASH" && + shift;; --hash=*) algo="${1#--hash=}" && shift;; diff --git a/t/test-lib.sh b/t/test-lib.sh index 92d0db13d7..be71890678 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -536,7 +536,8 @@ export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME export GIT_COMMITTER_DATE GIT_AUTHOR_DATE export EDITOR -GIT_DEFAULT_HASH="${GIT_TEST_DEFAULT_HASH:-sha1}" +GIT_TEST_BUILTIN_HASH=sha1 +GIT_DEFAULT_HASH="${GIT_TEST_DEFAULT_HASH:-$GIT_TEST_BUILTIN_HASH}" export GIT_DEFAULT_HASH GIT_DEFAULT_REF_FORMAT="${GIT_TEST_DEFAULT_REF_FORMAT:-files}" export GIT_DEFAULT_REF_FORMAT @@ -1895,6 +1896,10 @@ test_lazy_prereq SHA1 ' esac ' +test_lazy_prereq DEFAULT_HASH_ALGORITHM ' + test "$GIT_TEST_BUILTIN_HASH" = "$GIT_DEFAULT_HASH" +' + test_lazy_prereq DEFAULT_REPO_FORMAT ' test_have_prereq SHA1,REFFILES ' From 6866b422608ebfd25ba65935fd2d5378029ec3ea Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:33 +0000 Subject: [PATCH 07/11] t1007: choose the built-in hash outside of a repo Right now, the built-in default hash is always SHA-1, but that will change in a future commit. Instead of assuming that operating outside of a repository will always use SHA-1, simply ask test_oid for the built-in hash instead, which will always be correct. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t1007-hash-object.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index dbbe9fb0d4..b4e8d04885 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -252,9 +252,9 @@ test_expect_success '--literally complains about non-standard types' ' test_must_fail git hash-object -t bogus --literally --stdin ' -test_expect_success '--stdin outside of repository (uses SHA-1)' ' +test_expect_success '--stdin outside of repository (uses default hash)' ' nongit git hash-object --stdin actual && - echo "$(test_oid --hash=sha1 hello)" >expect && + echo "$(test_oid --hash=builtin hello)" >expect && test_cmp expect actual ' From f957ce078f61266b3212b88d9c357a1b7f071a6f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:34 +0000 Subject: [PATCH 08/11] t4042: choose the built-in hash outside of a repo Right now, the built-in default hash is always SHA-1, but that will change in a future commit. Instead of assuming that operating outside of a repository will always use SHA-1, provide constants for both algorithms and then simply ask test_oid for the built-in hash instead, which will always be correct. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t4042-diff-textconv-caching.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh index ff0e73531b..31018ceba2 100755 --- a/t/t4042-diff-textconv-caching.sh +++ b/t/t4042-diff-textconv-caching.sh @@ -120,6 +120,14 @@ test_expect_success 'log notes cache and still use cache for -p' ' ' test_expect_success 'caching is silently ignored outside repo' ' + test_oid_cache <<-\EOM && + oid1 sha1:5626abf + oid1 sha256:a4ed1f3 + oid2 sha1:f719efd + oid2 sha256:aa9e7dc + EOM + oid1=$(test_oid --hash=builtin oid1) && + oid2=$(test_oid --hash=builtin oid2) && mkdir -p non-repo && echo one >non-repo/one && echo two >non-repo/two && @@ -129,9 +137,9 @@ test_expect_success 'caching is silently ignored outside repo' ' -c diff.test.textconv="tr a-z A-Z <" \ -c diff.test.cachetextconv=true \ diff --no-index one two >actual && - cat >expect <<-\EOF && + cat >expect <<-EOF && diff --git a/one b/two - index 5626abf..f719efd 100644 + index $oid1..$oid2 100644 --- a/one +++ b/two @@ -1 +1 @@ From 9d619f2ef8c95a791d34f5d3cb2793dcc0b8610d Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:35 +0000 Subject: [PATCH 09/11] t5300: choose the built-in hash outside of a repo Right now, the built-in default hash is always SHA-1, but that will change in a future commit. Instead of assuming that operating outside of a repository will always use SHA-1, look up the default hash algorithm for operating outside of a repository using an appropriate environment variable, which will always be correct. Additionally, for operations outside of a repository, use the DEFAULT_HASH_ALGORITHM prerequisite rather than SHA1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5300-pack-object.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index a5932b6a8b..5013373891 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -525,7 +525,7 @@ test_expect_success 'index-pack --strict works in non-repo' ' test_path_is_file foo.idx ' -test_expect_success SHA1 'show-index works OK outside a repository' ' +test_expect_success DEFAULT_HASH_ALGORITHM 'show-index works OK outside a repository' ' nongit git show-index Date: Tue, 1 Jul 2025 21:22:36 +0000 Subject: [PATCH 10/11] help: add a build option for default hash We'd like users to be able to determine the hash algorithm that is the builtin default in their version of Git. This is useful for troubleshooting, especially when we decide to change the default. Add an entry for the default hash in the output of git version --build-options so that users can easily access that information and include it in bug reports. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- help.c | 1 + 1 file changed, 1 insertion(+) diff --git a/help.c b/help.c index 21b778707a..bd0be2ee57 100644 --- a/help.c +++ b/help.c @@ -810,6 +810,7 @@ void get_version_info(struct strbuf *buf, int show_build_options) SHA1_UNSAFE_BACKEND); #endif strbuf_addf(buf, "SHA-256: %s\n", SHA256_BACKEND); + strbuf_addf(buf, "default-hash: %s\n", hash_algos[GIT_HASH_DEFAULT].name); } } From c79bb70a2e7d9158ec165ea16ad45371cd6e350d Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:37 +0000 Subject: [PATCH 11/11] Enable SHA-256 by default in breaking changes mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our document on breaking changes indicates that we intend to default to SHA-256 in Git 3.0. Since most people choose the default option, this is an important security upgrade to our defaults. To allow people to test this case, when WITH_BREAKING_CHANGES is set in the configuration, build Git with SHA-256 as the default hash. Update the testsuite to use the build options information to automatically choose the right value. Note that if the command substitution for GIT_TEST_BUILTIN_HASH fails, so does the testsuite—and quite spectacularly at that. Thus, the case where the Git binary is somehow subtly broken will not go undetected. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hash.h | 8 +++++++- t/test-lib.sh | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hash.h b/hash.h index 953e840d15..3fcbe9bcba 100644 --- a/hash.h +++ b/hash.h @@ -174,8 +174,14 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s #define GIT_HASH_SHA256 2 /* Number of algorithms supported (including unknown). */ #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1) + /* Default hash algorithm if unspecified. */ -#define GIT_HASH_DEFAULT GIT_HASH_SHA1 +#ifdef WITH_BREAKING_CHANGES +# define GIT_HASH_DEFAULT GIT_HASH_SHA256 +#else +# define GIT_HASH_DEFAULT GIT_HASH_SHA1 +#endif + /* Legacy hash algorithm. Implied for older data formats which don't specify. */ #define GIT_HASH_SHA1_LEGACY GIT_HASH_SHA1 diff --git a/t/test-lib.sh b/t/test-lib.sh index be71890678..315543f293 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -536,7 +536,7 @@ export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME export GIT_COMMITTER_DATE GIT_AUTHOR_DATE export EDITOR -GIT_TEST_BUILTIN_HASH=sha1 +GIT_TEST_BUILTIN_HASH=$("$GIT_BUILD_DIR/git" version --build-options | sed -ne 's/^default-hash: //p') GIT_DEFAULT_HASH="${GIT_TEST_DEFAULT_HASH:-$GIT_TEST_BUILTIN_HASH}" export GIT_DEFAULT_HASH GIT_DEFAULT_REF_FORMAT="${GIT_TEST_DEFAULT_REF_FORMAT:-files}"