From c184795fc0eaf660b4fc06e7ee63aa9c136ff1aa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:26 +0200 Subject: [PATCH 1/9] meson: add infrastructure to build internal Rust library Add the infrastructure into Meson to build an internal Rust library. Building the Rust parts of Git are for now entirely optional, as they are mostly intended as a test balloon for both Git developers, but also for distributors of Git. So for now, they may contain: - New features that are not mission critical to Git and that users can easily live without. - Alternative implementations of small subsystems. If these test balloons are successful, we will eventually make Rust a mandatory dependency for our build process in Git 3.0. The availability of a Rust toolchain will be auto-detected by Meson at setup time. This behaviour can be tweaked via the `-Drust=` feature toggle. Next to the linkable Rust library, also wire up tests that can be executed via `meson test`. This allows us to use the native unit testing capabilities of Rust. Note that the Rust edition is currently set to 2018. This edition is supported by Rust 1.49, which is the target for the upcoming gcc-rs backend. For now we don't use any features of Rust that would require a newer version, so settling on this old version makes sense so that gcc-rs may become an alternative backend for compiling Git. If we _do_ want to introduce features that were added in more recent editions of Rust though we should reevaluate that choice. Inspired-by: Ezekiel Newren Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Cargo.toml | 9 +++++++++ meson.build | 10 +++++++++- meson_options.txt | 2 ++ src/cargo-meson.sh | 32 ++++++++++++++++++++++++++++++++ src/lib.rs | 0 src/meson.build | 40 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 Cargo.toml create mode 100755 src/cargo-meson.sh create mode 100644 src/lib.rs create mode 100644 src/meson.build diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000000..45c9b34981 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "gitcore" +version = "0.1.0" +edition = "2018" + +[lib] +crate-type = ["staticlib"] + +[dependencies] diff --git a/meson.build b/meson.build index e8ec0eca16..234a9e9d6f 100644 --- a/meson.build +++ b/meson.build @@ -220,7 +220,7 @@ project('git', 'c', # learned to define __STDC_VERSION__ with C11 and later. We thus require # GNU C99 and fall back to C11. Meson only learned to handle the fallback # with version 1.3.0, so on older versions we use GNU C99 unconditionally. - default_options: meson.version().version_compare('>=1.3.0') ? ['c_std=gnu99,c11'] : ['c_std=gnu99'], + default_options: meson.version().version_compare('>=1.3.0') ? ['rust_std=2018', 'c_std=gnu99,c11'] : ['rust_std=2018', 'c_std=gnu99'], ) fs = import('fs') @@ -1702,6 +1702,13 @@ version_def_h = custom_target( ) libgit_sources += version_def_h +cargo = find_program('cargo', dirs: program_path, native: true, required: get_option('rust')) +rust_option = get_option('rust').disable_auto_if(not cargo.found()) +if rust_option.allowed() + subdir('src') + libgit_c_args += '-DWITH_RUST' +endif + libgit = declare_dependency( link_with: static_library('git', sources: libgit_sources, @@ -2239,6 +2246,7 @@ summary({ 'pcre2': pcre2, 'perl': perl_features_enabled, 'python': target_python.found(), + 'rust': rust_option.allowed(), }, section: 'Auto-detected features', bool_yn: true) summary({ diff --git a/meson_options.txt b/meson_options.txt index 1668f260a1..143dee9237 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -71,6 +71,8 @@ option('zlib_backend', type: 'combo', choices: ['auto', 'zlib', 'zlib-ng'], valu # Build tweaks. option('breaking_changes', type: 'boolean', value: false, description: 'Enable upcoming breaking changes.') +option('rust', type: 'feature', value: 'auto', + description: 'Enable building with Rust.') option('macos_use_homebrew_gettext', type: 'boolean', value: true, description: 'Use gettext from Homebrew instead of the slightly-broken system-provided one.') diff --git a/src/cargo-meson.sh b/src/cargo-meson.sh new file mode 100755 index 0000000000..99400986d9 --- /dev/null +++ b/src/cargo-meson.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +if test "$#" -lt 2 +then + exit 1 +fi + +SOURCE_DIR="$1" +BUILD_DIR="$2" +BUILD_TYPE=debug + +shift 2 + +for arg +do + case "$arg" in + --release) + BUILD_TYPE=release;; + esac +done + +cargo build --lib --quiet --manifest-path="$SOURCE_DIR/Cargo.toml" --target-dir="$BUILD_DIR" "$@" +RET=$? +if test $RET -ne 0 +then + exit $RET +fi + +if ! cmp "$BUILD_DIR/$BUILD_TYPE/libgitcore.a" "$BUILD_DIR/libgitcore.a" >/dev/null 2>&1 +then + cp "$BUILD_DIR/$BUILD_TYPE/libgitcore.a" "$BUILD_DIR/libgitcore.a" +fi diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/meson.build b/src/meson.build new file mode 100644 index 0000000000..c8d874b210 --- /dev/null +++ b/src/meson.build @@ -0,0 +1,40 @@ +libgit_rs_sources = [ + 'lib.rs', +] + +# Unfortunately we must use a wrapper command to move the output file into the +# current build directory. This can fixed once `cargo build --artifact-dir` +# stabilizes. See https://github.com/rust-lang/cargo/issues/6790 for that +# effort. +cargo_command = [ + shell, + meson.current_source_dir() / 'cargo-meson.sh', + meson.project_source_root(), + meson.current_build_dir(), +] +if get_option('buildtype') == 'release' + cargo_command += '--release' +endif + +libgit_rs = custom_target('git_rs', + input: libgit_rs_sources + [ + meson.project_source_root() / 'Cargo.toml', + ], + output: 'libgitcore.a', + command: cargo_command, +) +libgit_dependencies += declare_dependency(link_with: libgit_rs) + +if get_option('tests') + test('rust', cargo, + args: [ + 'test', + '--manifest-path', + meson.project_source_root() / 'Cargo.toml', + '--target-dir', + meson.current_build_dir() / 'target', + ], + timeout: 0, + protocol: 'rust', + ) +endif From f2301be0765ef1baad163edcae96df92c5e05074 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:27 +0200 Subject: [PATCH 2/9] Makefile: reorder sources after includes In an upcoming change we'll make some of the sources compile conditionally based on whether or not `WITH_RUST` is defined. To let developers specify that flag in their "config.mak" we'll thus have to reorder our sources so that they come after the include of that file. Do so. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 176 +++++++++++++++++++++++++++---------------------------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/Makefile b/Makefile index 555b7f4dc3..7e52625d75 100644 --- a/Makefile +++ b/Makefile @@ -919,6 +919,94 @@ LIB_FILE = libgit.a XDIFF_LIB = xdiff/lib.a REFTABLE_LIB = reftable/libreftable.a +# xdiff and reftable libs may in turn depend on what is in libgit.a +GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(LIB_FILE) +EXTLIBS = + +GIT_USER_AGENT = git/$(GIT_VERSION) + +ifeq ($(wildcard sha1collisiondetection/lib/sha1.h),sha1collisiondetection/lib/sha1.h) +DC_SHA1_SUBMODULE = auto +endif + +# Set CFLAGS, LDFLAGS and other *FLAGS variables. These might be +# tweaked by config.* below as well as the command-line, both of +# which'll override these defaults. +# Older versions of GCC may require adding "-std=gnu99" at the end. +CFLAGS = -g -O2 -Wall +LDFLAGS = +CC_LD_DYNPATH = -Wl,-rpath, +BASIC_CFLAGS = -I. +BASIC_LDFLAGS = + +# library flags +ARFLAGS = rcs +PTHREAD_CFLAGS = + +# For the 'sparse' target +SPARSE_FLAGS ?= -std=gnu99 -D__STDC_NO_VLA__ +SP_EXTRA_FLAGS = + +# For informing GIT-BUILD-OPTIONS of the SANITIZE=leak,address targets +SANITIZE_LEAK = +SANITIZE_ADDRESS = + +# For the 'coccicheck' target +SPATCH_INCLUDE_FLAGS = --all-includes +SPATCH_FLAGS = +SPATCH_TEST_FLAGS = + +# If *.o files are present, have "coccicheck" depend on them, with +# COMPUTE_HEADER_DEPENDENCIES this will speed up the common-case of +# only needing to re-generate coccicheck results for the users of a +# given API if it's changed, and not all files in the project. If +# COMPUTE_HEADER_DEPENDENCIES=no this will be unset too. +SPATCH_USE_O_DEPENDENCIES = YesPlease + +# Set SPATCH_CONCAT_COCCI to concatenate the contrib/cocci/*.cocci +# files into a single contrib/cocci/ALL.cocci before running +# "coccicheck". +# +# Pros: +# +# - Speeds up a one-shot run of "make coccicheck", as we won't have to +# parse *.[ch] files N times for the N *.cocci rules +# +# Cons: +# +# - Will make incremental development of *.cocci slower, as +# e.g. changing strbuf.cocci will re-run all *.cocci. +# +# - Makes error and performance analysis harder, as rules will be +# applied from a monolithic ALL.cocci, rather than +# e.g. strbuf.cocci. To work around this either undefine this, or +# generate a specific patch, e.g. this will always use strbuf.cocci, +# not ALL.cocci: +# +# make contrib/coccinelle/strbuf.cocci.patch +SPATCH_CONCAT_COCCI = YesPlease + +# Rebuild 'coccicheck' if $(SPATCH), its flags etc. change +TRACK_SPATCH_DEFINES = +TRACK_SPATCH_DEFINES += $(SPATCH) +TRACK_SPATCH_DEFINES += $(SPATCH_INCLUDE_FLAGS) +TRACK_SPATCH_DEFINES += $(SPATCH_FLAGS) +TRACK_SPATCH_DEFINES += $(SPATCH_TEST_FLAGS) +GIT-SPATCH-DEFINES: FORCE + @FLAGS='$(TRACK_SPATCH_DEFINES)'; \ + if test x"$$FLAGS" != x"`cat GIT-SPATCH-DEFINES 2>/dev/null`" ; then \ + echo >&2 " * new spatch flags"; \ + echo "$$FLAGS" >GIT-SPATCH-DEFINES; \ + fi + +include config.mak.uname +-include config.mak.autogen +-include config.mak + +ifdef DEVELOPER +include config.mak.dev +endif + GENERATED_H += command-list.h GENERATED_H += config-list.h GENERATED_H += hook-list.h @@ -1387,94 +1475,6 @@ CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/unit-test.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o -# xdiff and reftable libs may in turn depend on what is in libgit.a -GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(LIB_FILE) -EXTLIBS = - -GIT_USER_AGENT = git/$(GIT_VERSION) - -ifeq ($(wildcard sha1collisiondetection/lib/sha1.h),sha1collisiondetection/lib/sha1.h) -DC_SHA1_SUBMODULE = auto -endif - -# Set CFLAGS, LDFLAGS and other *FLAGS variables. These might be -# tweaked by config.* below as well as the command-line, both of -# which'll override these defaults. -# Older versions of GCC may require adding "-std=gnu99" at the end. -CFLAGS = -g -O2 -Wall -LDFLAGS = -CC_LD_DYNPATH = -Wl,-rpath, -BASIC_CFLAGS = -I. -BASIC_LDFLAGS = - -# library flags -ARFLAGS = rcs -PTHREAD_CFLAGS = - -# For the 'sparse' target -SPARSE_FLAGS ?= -std=gnu99 -D__STDC_NO_VLA__ -SP_EXTRA_FLAGS = - -# For informing GIT-BUILD-OPTIONS of the SANITIZE=leak,address targets -SANITIZE_LEAK = -SANITIZE_ADDRESS = - -# For the 'coccicheck' target -SPATCH_INCLUDE_FLAGS = --all-includes -SPATCH_FLAGS = -SPATCH_TEST_FLAGS = - -# If *.o files are present, have "coccicheck" depend on them, with -# COMPUTE_HEADER_DEPENDENCIES this will speed up the common-case of -# only needing to re-generate coccicheck results for the users of a -# given API if it's changed, and not all files in the project. If -# COMPUTE_HEADER_DEPENDENCIES=no this will be unset too. -SPATCH_USE_O_DEPENDENCIES = YesPlease - -# Set SPATCH_CONCAT_COCCI to concatenate the contrib/cocci/*.cocci -# files into a single contrib/cocci/ALL.cocci before running -# "coccicheck". -# -# Pros: -# -# - Speeds up a one-shot run of "make coccicheck", as we won't have to -# parse *.[ch] files N times for the N *.cocci rules -# -# Cons: -# -# - Will make incremental development of *.cocci slower, as -# e.g. changing strbuf.cocci will re-run all *.cocci. -# -# - Makes error and performance analysis harder, as rules will be -# applied from a monolithic ALL.cocci, rather than -# e.g. strbuf.cocci. To work around this either undefine this, or -# generate a specific patch, e.g. this will always use strbuf.cocci, -# not ALL.cocci: -# -# make contrib/coccinelle/strbuf.cocci.patch -SPATCH_CONCAT_COCCI = YesPlease - -# Rebuild 'coccicheck' if $(SPATCH), its flags etc. change -TRACK_SPATCH_DEFINES = -TRACK_SPATCH_DEFINES += $(SPATCH) -TRACK_SPATCH_DEFINES += $(SPATCH_INCLUDE_FLAGS) -TRACK_SPATCH_DEFINES += $(SPATCH_FLAGS) -TRACK_SPATCH_DEFINES += $(SPATCH_TEST_FLAGS) -GIT-SPATCH-DEFINES: FORCE - @FLAGS='$(TRACK_SPATCH_DEFINES)'; \ - if test x"$$FLAGS" != x"`cat GIT-SPATCH-DEFINES 2>/dev/null`" ; then \ - echo >&2 " * new spatch flags"; \ - echo "$$FLAGS" >GIT-SPATCH-DEFINES; \ - fi - -include config.mak.uname --include config.mak.autogen --include config.mak - -ifdef DEVELOPER -include config.mak.dev -endif - GIT-VERSION-FILE: FORCE @OLD=$$(cat $@ 2>/dev/null || :) && \ $(call version_gen,"$(shell pwd)",GIT-VERSION-FILE.in,$@) && \ From e30c081c6af4963418184dbcd5df37322032f9dc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:28 +0200 Subject: [PATCH 3/9] Makefile: introduce infrastructure to build internal Rust library Introduce infrastructure to build the internal Rust library. This mirrors the infrastructure we have added to Meson in the preceding commit. Developers can enable the infrastructure by passing the new `WITH_RUST` build toggle. Inspired-by: Ezekiel Newren Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- .gitignore | 2 ++ Makefile | 37 +++++++++++++++++++++++++++++++++++++ shared.mak | 1 + 3 files changed, 40 insertions(+) diff --git a/.gitignore b/.gitignore index 1803023427..0833453cf6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ /fuzz_corpora +/target/ +/Cargo.lock /GIT-BUILD-DIR /GIT-BUILD-OPTIONS /GIT-CFLAGS diff --git a/Makefile b/Makefile index 7e52625d75..31e79342e1 100644 --- a/Makefile +++ b/Makefile @@ -483,6 +483,14 @@ include shared.mak # Define LIBPCREDIR=/foo/bar if your PCRE header and library files are # in /foo/bar/include and /foo/bar/lib directories. # +# == Optional Rust support == +# +# Define WITH_RUST if you want to include features and subsystems written in +# Rust into Git. For now, Rust is still an optional feature of the build +# process. With Git 3.0 though, Rust will always be enabled. +# +# Building Rust code requires Cargo. +# # == SHA-1 and SHA-256 defines == # # === SHA-1 backend === @@ -683,6 +691,7 @@ OBJECTS = OTHER_PROGRAMS = PROGRAM_OBJS = PROGRAMS = +RUST_SOURCES = EXCLUDED_PROGRAMS = SCRIPT_PERL = SCRIPT_PYTHON = @@ -918,6 +927,11 @@ TEST_SHELL_PATH = $(SHELL_PATH) LIB_FILE = libgit.a XDIFF_LIB = xdiff/lib.a REFTABLE_LIB = reftable/libreftable.a +ifdef DEBUG +RUST_LIB = target/debug/libgitcore.a +else +RUST_LIB = target/release/libgitcore.a +endif # xdiff and reftable libs may in turn depend on what is in libgit.a GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(LIB_FILE) @@ -943,6 +957,15 @@ BASIC_LDFLAGS = ARFLAGS = rcs PTHREAD_CFLAGS = +# Rust flags +CARGO_ARGS = +ifndef V +CARGO_ARGS += --quiet +endif +ifndef DEBUG +CARGO_ARGS += --release +endif + # For the 'sparse' target SPARSE_FLAGS ?= -std=gnu99 -D__STDC_NO_VLA__ SP_EXTRA_FLAGS = @@ -1475,6 +1498,8 @@ CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/unit-test.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o +RUST_SOURCES += src/lib.rs + GIT-VERSION-FILE: FORCE @OLD=$$(cat $@ 2>/dev/null || :) && \ $(call version_gen,"$(shell pwd)",GIT-VERSION-FILE.in,$@) && \ @@ -1504,6 +1529,11 @@ endif ALL_CFLAGS = $(DEVELOPER_CFLAGS) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_APPEND) ALL_LDFLAGS = $(LDFLAGS) $(LDFLAGS_APPEND) +ifdef WITH_RUST +BASIC_CFLAGS += -DWITH_RUST +GITLIBS += $(RUST_LIB) +endif + ifdef SANITIZE SANITIZERS := $(foreach flag,$(subst $(comma),$(space),$(SANITIZE)),$(flag)) BASIC_CFLAGS += -fsanitize=$(SANITIZE) -fno-sanitize-recover=$(SANITIZE) @@ -2918,6 +2948,12 @@ scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ +$(RUST_LIB): Cargo.toml $(RUST_SOURCES) + $(QUIET_CARGO)cargo build $(CARGO_ARGS) + +.PHONY: rust +rust: $(RUST_LIB) + $(XDIFF_LIB): $(XDIFF_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ @@ -3768,6 +3804,7 @@ clean: profile-clean coverage-clean cocciclean $(RM) $(FUZZ_PROGRAMS) $(RM) $(SP_OBJ) $(RM) $(HCC) + $(RM) -r Cargo.lock target/ $(RM) version-def.h $(RM) -r $(dep_dirs) $(compdb_dir) compile_commands.json $(RM) $(test_bindir_programs) diff --git a/shared.mak b/shared.mak index 5c7bc94785..0e7492076e 100644 --- a/shared.mak +++ b/shared.mak @@ -56,6 +56,7 @@ ifndef V QUIET_MKDIR_P_PARENT = @echo ' ' MKDIR -p $(@D); ## Used in "Makefile" + QUIET_CARGO = @echo ' ' CARGO $@; QUIET_CC = @echo ' ' CC $@; QUIET_AR = @echo ' ' AR $@; QUIET_LINK = @echo ' ' LINK $@; From cb2badb4db67bcd02cc99a336c7b6bb0281980a1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:29 +0200 Subject: [PATCH 4/9] help: report on whether or not Rust is enabled We're about to introduce support for Rust into the core of Git, where some (trivial) subsystems are converted to Rust. These subsystems will also retain a C implementation though as Rust is not yet mandatory. Consequently, it now becomes possible for a Git version to have bugs that are specific to whether or not it is built with Rust support overall. Expose information about whether or not Git was built with Rust via our build info. This means that both `git version --build-options`, but also `git bugreport` will now expose that bit of information. Hopefully, this should make it easier for us to discover any Rust-specific issues. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- help.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/help.c b/help.c index bb20498cfd..5854dd4a7e 100644 --- a/help.c +++ b/help.c @@ -791,6 +791,12 @@ void get_version_info(struct strbuf *buf, int show_build_options) strbuf_addf(buf, "shell-path: %s\n", SHELL_PATH); /* NEEDSWORK: also save and output GIT-BUILD_OPTIONS? */ +#if defined WITH_RUST + strbuf_addstr(buf, "rust: enabled\n"); +#else + strbuf_addstr(buf, "rust: disabled\n"); +#endif + if (fsmonitor_ipc__is_supported()) strbuf_addstr(buf, "feature: fsmonitor--daemon\n"); #if defined LIBCURL_VERSION From f366bfe16b350240c70c487d180c76ddcb8a1b2d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:30 +0200 Subject: [PATCH 5/9] varint: use explicit width for integers The varint subsystem currently uses implicit widths for integers. On the one hand we use `uintmax_t` for the actual value. On the other hand, we use `int` for the length of the encoded varint. Both of these have known maximum values, as we only support at most 16 bytes when encoding varints. Thus, we know that we won't ever exceed `uint64_t` for the actual value and `uint8_t` for the prefix length. Refactor the code to use explicit widths. Besides making the logic platform-independent, it also makes our life a bit easier in the next commit, where we reimplement "varint.c" in Rust. Suggested-by: Ezekiel Newren Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- dir.c | 18 ++++++++++-------- read-cache.c | 6 ++++-- varint.c | 6 +++--- varint.h | 4 ++-- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/dir.c b/dir.c index 71108ac79b..0a67a99cb3 100644 --- a/dir.c +++ b/dir.c @@ -3579,7 +3579,8 @@ static void write_one_dir(struct untracked_cache_dir *untracked, struct stat_data stat_data; struct strbuf *out = &wd->out; unsigned char intbuf[16]; - unsigned int intlen, value; + unsigned int value; + uint8_t intlen; int i = wd->index++; /* @@ -3632,7 +3633,7 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra struct ondisk_untracked_cache *ouc; struct write_data wd; unsigned char varbuf[16]; - int varint_len; + uint8_t varint_len; const unsigned hashsz = the_hash_algo->rawsz; CALLOC_ARRAY(ouc, 1); @@ -3738,7 +3739,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_, struct untracked_cache_dir ud, *untracked; const unsigned char *data = rd->data, *end = rd->end; const unsigned char *eos; - unsigned int value; + uint64_t value; int i; memset(&ud, 0, sizeof(ud)); @@ -3830,7 +3831,8 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long struct read_data rd; const unsigned char *next = data, *end = (const unsigned char *)data + sz; const char *ident; - int ident_len; + uint64_t ident_len; + uint64_t varint_len; ssize_t len; const char *exclude_per_dir; const unsigned hashsz = the_hash_algo->rawsz; @@ -3867,8 +3869,8 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long if (next >= end) goto done2; - len = decode_varint(&next); - if (next > end || len == 0) + varint_len = decode_varint(&next); + if (next > end || varint_len == 0) goto done2; rd.valid = ewah_new(); @@ -3877,9 +3879,9 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long rd.data = next; rd.end = end; rd.index = 0; - ALLOC_ARRAY(rd.ucd, len); + ALLOC_ARRAY(rd.ucd, varint_len); - if (read_one_dir(&uc->root, &rd) || rd.index != len) + if (read_one_dir(&uc->root, &rd) || rd.index != varint_len) goto done; next = rd.data; diff --git a/read-cache.c b/read-cache.c index 06ad74db22..41b44148b1 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1807,7 +1807,7 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, if (expand_name_field) { const unsigned char *cp = (const unsigned char *)name; - size_t strip_len, previous_len; + uint64_t strip_len, previous_len; /* If we're at the beginning of a block, ignore the previous name */ strip_len = decode_varint(&cp); @@ -2655,8 +2655,10 @@ static int ce_write_entry(struct hashfile *f, struct cache_entry *ce, hashwrite(f, ce->name, len); hashwrite(f, padding, align_padding_size(size, len)); } else { - int common, to_remove, prefix_size; + int common, to_remove; + uint8_t prefix_size; unsigned char to_remove_vi[16]; + for (common = 0; (common < previous_name->len && ce->name[common] && diff --git a/varint.c b/varint.c index 409c4977a1..03cd54416b 100644 --- a/varint.c +++ b/varint.c @@ -1,11 +1,11 @@ #include "git-compat-util.h" #include "varint.h" -uintmax_t decode_varint(const unsigned char **bufp) +uint64_t decode_varint(const unsigned char **bufp) { const unsigned char *buf = *bufp; unsigned char c = *buf++; - uintmax_t val = c & 127; + uint64_t val = c & 127; while (c & 128) { val += 1; if (!val || MSB(val, 7)) @@ -17,7 +17,7 @@ uintmax_t decode_varint(const unsigned char **bufp) return val; } -int encode_varint(uintmax_t value, unsigned char *buf) +uint8_t encode_varint(uint64_t value, unsigned char *buf) { unsigned char varint[16]; unsigned pos = sizeof(varint) - 1; diff --git a/varint.h b/varint.h index f78bb0ca52..eb401935bd 100644 --- a/varint.h +++ b/varint.h @@ -1,7 +1,7 @@ #ifndef VARINT_H #define VARINT_H -int encode_varint(uintmax_t, unsigned char *); -uintmax_t decode_varint(const unsigned char **); +uint8_t encode_varint(uint64_t, unsigned char *); +uint64_t decode_varint(const unsigned char **); #endif /* VARINT_H */ From 8832e728d362992a38eef89613b44d24f18e6c2a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:31 +0200 Subject: [PATCH 6/9] varint: reimplement as test balloon for Rust Implement a trivial test balloon for our Rust build infrastructure by reimplementing the "varint.c" subsystem in Rust. This subsystem is chosen because it is trivial to convert and because it doesn't have any dependencies to other components of Git. If support for Rust is enabled, we stop compiling "varint.c" and instead compile and use "src/varint.rs". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 3 ++ meson.build | 5 ++- src/lib.rs | 1 + src/meson.build | 1 + src/varint.rs | 92 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 src/varint.rs diff --git a/Makefile b/Makefile index 31e79342e1..2a7fc5cb1f 100644 --- a/Makefile +++ b/Makefile @@ -1307,7 +1307,9 @@ LIB_OBJS += urlmatch.o LIB_OBJS += usage.o LIB_OBJS += userdiff.o LIB_OBJS += utf8.o +ifndef WITH_RUST LIB_OBJS += varint.o +endif LIB_OBJS += version.o LIB_OBJS += versioncmp.o LIB_OBJS += walker.o @@ -1499,6 +1501,7 @@ CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/unit-test.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o RUST_SOURCES += src/lib.rs +RUST_SOURCES += src/varint.rs GIT-VERSION-FILE: FORCE @OLD=$$(cat $@ 2>/dev/null || :) && \ diff --git a/meson.build b/meson.build index 234a9e9d6f..37dfa28601 100644 --- a/meson.build +++ b/meson.build @@ -522,7 +522,6 @@ libgit_sources = [ 'usage.c', 'userdiff.c', 'utf8.c', - 'varint.c', 'version.c', 'versioncmp.c', 'walker.c', @@ -1707,6 +1706,10 @@ rust_option = get_option('rust').disable_auto_if(not cargo.found()) if rust_option.allowed() subdir('src') libgit_c_args += '-DWITH_RUST' +else + libgit_sources += [ + 'varint.c', + ] endif libgit = declare_dependency( diff --git a/src/lib.rs b/src/lib.rs index e69de29bb2..9da70d8b57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod varint; diff --git a/src/meson.build b/src/meson.build index c8d874b210..25b9ad5a14 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,5 +1,6 @@ libgit_rs_sources = [ 'lib.rs', + 'varint.rs', ] # Unfortunately we must use a wrapper command to move the output file into the diff --git a/src/varint.rs b/src/varint.rs new file mode 100644 index 0000000000..6e610bdd8e --- /dev/null +++ b/src/varint.rs @@ -0,0 +1,92 @@ +#[no_mangle] +pub unsafe extern "C" fn decode_varint(bufp: *mut *const u8) -> u64 { + let mut buf = *bufp; + let mut c = *buf; + let mut val = u64::from(c & 127); + + buf = buf.add(1); + + while (c & 128) != 0 { + val = val.wrapping_add(1); + if val == 0 || val.leading_zeros() < 7 { + return 0; // overflow + } + + c = *buf; + buf = buf.add(1); + + val = (val << 7) + u64::from(c & 127); + } + + *bufp = buf; + val +} + +#[no_mangle] +pub unsafe extern "C" fn encode_varint(value: u64, buf: *mut u8) -> u8 { + let mut varint: [u8; 16] = [0; 16]; + let mut pos = varint.len() - 1; + + varint[pos] = (value & 127) as u8; + + let mut value = value >> 7; + while value != 0 { + pos -= 1; + value -= 1; + varint[pos] = 128 | (value & 127) as u8; + value >>= 7; + } + + if !buf.is_null() { + std::ptr::copy_nonoverlapping(varint.as_ptr().add(pos), buf, varint.len() - pos); + } + + (varint.len() - pos) as u8 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_decode_varint() { + unsafe { + assert_eq!(decode_varint(&mut [0x00].as_slice().as_ptr()), 0); + assert_eq!(decode_varint(&mut [0x01].as_slice().as_ptr()), 1); + assert_eq!(decode_varint(&mut [0x7f].as_slice().as_ptr()), 127); + assert_eq!(decode_varint(&mut [0x80, 0x00].as_slice().as_ptr()), 128); + assert_eq!(decode_varint(&mut [0x80, 0x01].as_slice().as_ptr()), 129); + assert_eq!(decode_varint(&mut [0x80, 0x7f].as_slice().as_ptr()), 255); + + // Overflows are expected to return 0. + assert_eq!(decode_varint(&mut [0x88; 16].as_slice().as_ptr()), 0); + } + } + + #[test] + fn test_encode_varint() { + unsafe { + let mut varint: [u8; 16] = [0; 16]; + + assert_eq!(encode_varint(0, std::ptr::null_mut()), 1); + + assert_eq!(encode_varint(0, varint.as_mut_slice().as_mut_ptr()), 1); + assert_eq!(varint, [0; 16]); + + assert_eq!(encode_varint(10, varint.as_mut_slice().as_mut_ptr()), 1); + assert_eq!(varint, [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + + assert_eq!(encode_varint(127, varint.as_mut_slice().as_mut_ptr()), 1); + assert_eq!(varint, [127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + + assert_eq!(encode_varint(128, varint.as_mut_slice().as_mut_ptr()), 2); + assert_eq!(varint, [128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + + assert_eq!(encode_varint(129, varint.as_mut_slice().as_mut_ptr()), 2); + assert_eq!(varint, [128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + + assert_eq!(encode_varint(255, varint.as_mut_slice().as_mut_ptr()), 2); + assert_eq!(varint, [128, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + } + } +} From 8f5daaff927e868b0460dda40cdb0923b8a6ef35 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:32 +0200 Subject: [PATCH 7/9] BreakingChanges: announce Rust becoming mandatory Over the last couple of years the appetite for bringing Rust into the codebase has grown significantly across the developer base. Introducing Rust is a major change though and has ramifications for the whole ecosystem: - Some platforms have a Rust toolchain available, but have not yet integrated it into their build infrastructure. - Some platforms don't have any support for Rust at all. - Some platforms may have to figure out how to fit Rust into their bootstrapping sequence. Due to this, and given that Git is a critical piece of infrastructure for the whole industry, we cannot just introduce such a heavyweight dependency without doing our due diligence. Instead, preceding commits have introduced a test balloon into our build infrastructure that convert one tiny subsystem to use Rust. For now, using Rust to build that subsystem is entirely optional -- if no Rust support is available, we continue to use the C implementation. This test balloon has the intention to give distributions time and let them ease into our adoption of Rust. Having multiple implementations of the same subsystem is not sustainable though, and the plan is to eventually be able to use Rust freely all across our codebase. As such, there is the intent to make Rust become a mandatory part of our build process. Add an announcement to our breaking changes that Rust will become mandatory in Git 3.0. A (very careful and non-binding) estimate might be that this major release might be released in the second half of next year, which should give distributors enough time to prepare for the change. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/BreakingChanges.adoc | 45 ++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/Documentation/BreakingChanges.adoc b/Documentation/BreakingChanges.adoc index f8d2eba061..c21f902134 100644 --- a/Documentation/BreakingChanges.adoc +++ b/Documentation/BreakingChanges.adoc @@ -165,6 +165,51 @@ A prerequisite for this change is that the ecosystem is ready to support the "reftable" format. Most importantly, alternative implementations of Git like JGit, libgit2 and Gitoxide need to support it. +* Git will require Rust as a mandatory part of the build process. While Git + already started to adopt Rust in Git 2.49, all parts written in Rust are + optional for the time being. This includes: ++ + ** The Rust wrapper around libgit.a that is part of "contrib/" and which has + been introduced in Git 2.49. + ** Subsystems that have an alternative implementation in Rust to test + interoperability between our C and Rust codebase. + ** Newly written features that are not mission critical for a fully functional + Git client. ++ +These changes are meant as test balloons to allow distributors of Git to prepare +for Rust becoming a mandatory part of the build process. There will be multiple +milestones for the introduction of Rust: ++ +-- +1. Initially, with Git 2.52, support for Rust will be auto-detected by Meson and + disabled in our Makefile so that the project can sort out the initial + infrastructure. +2. In Git 2.53, both build systems will default-enable support for Rust. + Consequently, builds will break by default if Rust is not available on the + build host. The use of Rust can still be explicitly disabled via build + flags. +3. In Git 3.0, the build options will be removed and support for Rust is + mandatory. +-- ++ +You can explicitly ask both Meson and our Makefile-based system to enable Rust +by saying `meson configure -Drust=enabled` and `make WITH_RUST=YesPlease`, +respectively. ++ +The Git project will declare the last version before Git 3.0 to be a long-term +support release. This long-term release will receive important bug fixes for at +least four release cycles and security fixes for six release cycles. The Git +project will hand over maintainership of the long-term release to distributors +in case they need to extend the life of that long-term release even further. +Details of how this long-term release will be handed over to the community will +be discussed once the Git project decides to stop officially supporting it. ++ +We will evaluate the impact on downstream distributions before making Rust +mandatory in Git 3.0. If we see that the impact on downstream distributions +would be significant, we may decide to defer this change to a subsequent minor +release. This evaluation will also take into account our own experience with +how painful it is to keep Rust an optional component. + === Removals * Support for grafting commits has long been superseded by git-replace(1). From 6ab3977200fc6f69c1a01c0dbefabbbed6b45fb0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:33 +0200 Subject: [PATCH 8/9] ci: convert "pedantic" job into full build with breaking changes The "pedantic" CI job is building on Fedora with `DEVOPTS=pedantic`. This build flag doesn't do anything anymore starting with 6a8cbc41ba (developer: enable pedantic by default, 2021-09-03), where we have flipped the default so that developers have to opt-out of pedantic builds via the "no-pedantic" option. As such, all this job really does is to do a normal build on Fedora, which isn't all that interesting. Convert that job into a full build-and-test job that uses Meson with breaking changes enabled. This plugs two gaps: - We now test on another distro that we didn't run tests on beforehand. - We verify that breaking changes work as expected with Meson. Furthermore, in a subsequent commit we'll modify both jobs that use breaking changes to also enable Rust. By converting the Fedora job to use Meson, we ensure that we test our Rust build infrastructure for both build systems. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- .github/workflows/main.yml | 4 ++-- .gitlab-ci.yml | 4 ++-- ci/install-dependencies.sh | 6 +++++- ci/run-build-and-tests.sh | 29 ++++++++--------------------- 4 files changed, 17 insertions(+), 26 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d122e79415..393ea4d1cc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -379,6 +379,8 @@ jobs: - jobname: linux-breaking-changes cc: gcc image: ubuntu:rolling + - jobname: fedora-breaking-changes-meson + image: fedora:latest - jobname: linux-leaks image: ubuntu:rolling cc: gcc @@ -396,8 +398,6 @@ jobs: # Supported until 2025-04-02. - jobname: linux32 image: i386/ubuntu:focal - - jobname: pedantic - image: fedora:latest # A RHEL 8 compatible distro. Supported until 2029-05-31. - jobname: almalinux-8 image: almalinux:8 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index af10ebb59a..4248506909 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,6 +45,8 @@ test:linux: - jobname: linux-breaking-changes image: ubuntu:20.04 CC: gcc + - jobname: fedora-breaking-changes-meson + image: fedora:latest - jobname: linux-TEST-vars image: ubuntu:20.04 CC: gcc @@ -58,8 +60,6 @@ test:linux: - jobname: linux-asan-ubsan image: ubuntu:rolling CC: clang - - jobname: pedantic - image: fedora:latest - jobname: linux-musl-meson image: alpine:latest - jobname: linux32 diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index d061a47293..35bd05b85b 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -30,8 +30,12 @@ alpine-*) bash cvs gnupg perl-cgi perl-dbd-sqlite perl-io-tty >/dev/null ;; fedora-*|almalinux-*) + case "$jobname" in + *-meson) + MESON_DEPS="meson ninja";; + esac dnf -yq update >/dev/null && - dnf -yq install shadow-utils sudo make gcc findutils diffutils perl python3 gawk gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel >/dev/null + dnf -yq install shadow-utils sudo make pkg-config gcc findutils diffutils perl python3 gawk gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel $MESON_DEPS >/dev/null ;; ubuntu-*|i386/ubuntu-*|debian-*) # Required so that apt doesn't wait for user input on certain packages. diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index 01823fd0f1..3680446649 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -5,12 +5,11 @@ . ${0%/*}/lib.sh -run_tests=t - case "$jobname" in -linux-breaking-changes) +fedora-breaking-changes-musl|linux-breaking-changes) export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export WITH_BREAKING_CHANGES=YesPlease + MESONFLAGS="$MESONFLAGS -Dbreaking_changes=true" ;; linux-TEST-vars) export OPENSSL_SHA1_UNSAFE=YesPlease @@ -36,12 +35,6 @@ linux-sha256) linux-reftable|linux-reftable-leaks|osx-reftable) export GIT_TEST_DEFAULT_REF_FORMAT=reftable ;; -pedantic) - # Don't run the tests; we only care about whether Git can be - # built. - export DEVOPTS=pedantic - run_tests= - ;; esac case "$jobname" in @@ -54,21 +47,15 @@ case "$jobname" in -Dtest_output_directory="${TEST_OUTPUT_DIRECTORY:-$(pwd)/t}" \ $MESONFLAGS group "Build" meson compile -C build -- - if test -n "$run_tests" - then - group "Run tests" meson test -C build --print-errorlogs --test-args="$GIT_TEST_OPTS" || ( - ./t/aggregate-results.sh "${TEST_OUTPUT_DIRECTORY:-t}/test-results" - handle_failed_tests - ) - fi + group "Run tests" meson test -C build --print-errorlogs --test-args="$GIT_TEST_OPTS" || ( + ./t/aggregate-results.sh "${TEST_OUTPUT_DIRECTORY:-t}/test-results" + handle_failed_tests + ) ;; *) group Build make - if test -n "$run_tests" - then - group "Run tests" make test || - handle_failed_tests - fi + group "Run tests" make test || + handle_failed_tests ;; esac From e425c40aa00d2ae6b1bbc33cfa9fecd30a0a8ec6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Oct 2025 09:29:34 +0200 Subject: [PATCH 9/9] ci: enable Rust for breaking-changes jobs Enable Rust for our breaking-changes jobs so that we can verify that the build infrastructure and the converted Rust subsystems work as expected. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ci/install-dependencies.sh | 4 ++-- ci/run-build-and-tests.sh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index 35bd05b85b..0d3aa496fc 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -35,7 +35,7 @@ fedora-*|almalinux-*) MESON_DEPS="meson ninja";; esac dnf -yq update >/dev/null && - dnf -yq install shadow-utils sudo make pkg-config gcc findutils diffutils perl python3 gawk gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel $MESON_DEPS >/dev/null + dnf -yq install shadow-utils sudo make pkg-config gcc findutils diffutils perl python3 gawk gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel $MESON_DEPS cargo >/dev/null ;; ubuntu-*|i386/ubuntu-*|debian-*) # Required so that apt doesn't wait for user input on certain packages. @@ -62,7 +62,7 @@ ubuntu-*|i386/ubuntu-*|debian-*) make libssl-dev libcurl4-openssl-dev libexpat-dev wget sudo default-jre \ tcl tk gettext zlib1g-dev perl-modules liberror-perl libauthen-sasl-perl \ libemail-valid-perl libio-pty-perl libio-socket-ssl-perl libnet-smtp-ssl-perl libdbd-sqlite3-perl libcgi-pm-perl \ - libsecret-1-dev libpcre2-dev meson ninja-build pkg-config \ + libsecret-1-dev libpcre2-dev meson ninja-build pkg-config cargo \ ${CC_PACKAGE:-${CC:-gcc}} $PYTHON_PACKAGE case "$distro" in diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index 3680446649..c718bd101a 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -9,7 +9,9 @@ case "$jobname" in fedora-breaking-changes-musl|linux-breaking-changes) export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export WITH_BREAKING_CHANGES=YesPlease + export WITH_RUST=YesPlease MESONFLAGS="$MESONFLAGS -Dbreaking_changes=true" + MESONFLAGS="$MESONFLAGS -Drust=enabled" ;; linux-TEST-vars) export OPENSSL_SHA1_UNSAFE=YesPlease