Merge branch 'jt/de-global-bulk-checkin'

The bulk-checkin code used to depend on a file-scope static
singleton variable, which has been updated to pass an instance
throughout the callchain.

* jt/de-global-bulk-checkin:
  bulk-checkin: use repository variable from transaction
  bulk-checkin: require transaction for index_blob_bulk_checkin()
  bulk-checkin: remove global transaction state
  bulk-checkin: introduce object database transaction structure
main
Junio C Hamano 2025-09-15 08:52:05 -07:00
commit 7d00521d7b
9 changed files with 147 additions and 107 deletions

View File

@ -389,6 +389,7 @@ int cmd_add(int argc,
char *seen = NULL;
char *ps_matched = NULL;
struct lock_file lock_file = LOCK_INIT;
struct odb_transaction *transaction;

repo_config(repo, add_config, NULL);

@ -574,7 +575,7 @@ int cmd_add(int argc,
string_list_clear(&only_match_skip_worktree, 0);
}

begin_odb_transaction();
transaction = begin_odb_transaction(repo->objects);

ps_matched = xcalloc(pathspec.nr, 1);
if (add_renormalize)
@ -593,7 +594,7 @@ int cmd_add(int argc,

if (chmod_arg && pathspec.nr)
exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only);
end_odb_transaction();
end_odb_transaction(transaction);

finish:
if (write_locked_index(repo->index, &lock_file,

View File

@ -584,6 +584,7 @@ static void unpack_all(void)
{
int i;
unsigned char *hdr = fill(sizeof(struct pack_header));
struct odb_transaction *transaction;

if (get_be32(hdr) != PACK_SIGNATURE)
die("bad pack file");
@ -599,12 +600,12 @@ static void unpack_all(void)
progress = start_progress(the_repository,
_("Unpacking objects"), nr_objects);
CALLOC_ARRAY(obj_list, nr_objects);
begin_odb_transaction();
transaction = begin_odb_transaction(the_repository->objects);
for (i = 0; i < nr_objects; i++) {
unpack_one(i);
display_progress(progress, i + 1);
}
end_odb_transaction();
end_odb_transaction(transaction);
stop_progress(&progress);

if (delta_list)

View File

@ -77,7 +77,7 @@ static void report(const char *fmt, ...)
* objects invisible while a transaction is active, so flush the
* transaction here before reporting a change made by update-index.
*/
flush_odb_transaction();
flush_odb_transaction(the_repository->objects->transaction);
va_start(vp, fmt);
vprintf(fmt, vp);
putchar('\n');
@ -940,6 +940,7 @@ int cmd_update_index(int argc,
strbuf_getline_fn getline_fn;
int parseopt_state = PARSE_OPT_UNKNOWN;
struct repository *r = the_repository;
struct odb_transaction *transaction;
struct option options[] = {
OPT_BIT('q', NULL, &refresh_args.flags,
N_("continue refresh even when index needs update"),
@ -1130,7 +1131,7 @@ int cmd_update_index(int argc,
* Allow the object layer to optimize adding multiple objects in
* a batch.
*/
begin_odb_transaction();
transaction = begin_odb_transaction(the_repository->objects);
while (ctx.argc) {
if (parseopt_state != PARSE_OPT_DONE)
parseopt_state = parse_options_step(&ctx, options,
@ -1213,7 +1214,7 @@ int cmd_update_index(int argc,
/*
* By now we have added all of the new objects
*/
end_odb_transaction();
end_odb_transaction(transaction);

if (split_index > 0) {
if (repo_config_get_split_index(the_repository) == 0)

View File

@ -19,11 +19,7 @@
#include "object-file.h"
#include "odb.h"

static int odb_transaction_nesting;

static struct tmp_objdir *bulk_fsync_objdir;

static struct bulk_checkin_packfile {
struct bulk_checkin_packfile {
char *pack_tmp_name;
struct hashfile *f;
off_t offset;
@ -32,27 +28,36 @@ static struct bulk_checkin_packfile {
struct pack_idx_entry **written;
uint32_t alloc_written;
uint32_t nr_written;
} bulk_checkin_packfile;
};

static void finish_tmp_packfile(struct strbuf *basename,
const char *pack_tmp_name,
struct pack_idx_entry **written_list,
uint32_t nr_written,
struct pack_idx_option *pack_idx_opts,
struct odb_transaction {
struct object_database *odb;

int nesting;
struct tmp_objdir *objdir;
struct bulk_checkin_packfile packfile;
};

static void finish_tmp_packfile(struct odb_transaction *transaction,
struct strbuf *basename,
unsigned char hash[])
{
struct bulk_checkin_packfile *state = &transaction->packfile;
struct repository *repo = transaction->odb->repo;
char *idx_tmp_name = NULL;

stage_tmp_packfiles(the_repository, basename, pack_tmp_name,
written_list, nr_written, NULL, pack_idx_opts, hash,
&idx_tmp_name);
rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name);
stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
state->written, state->nr_written, NULL,
&state->pack_idx_opts, hash, &idx_tmp_name);
rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);

free(idx_tmp_name);
}

static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
{
struct bulk_checkin_packfile *state = &transaction->packfile;
struct repository *repo = transaction->odb->repo;
unsigned char hash[GIT_MAX_RAWSZ];
struct strbuf packname = STRBUF_INIT;

@ -69,17 +74,17 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name,
fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
state->nr_written, hash,
state->offset);
close(fd);
}

strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository),
hash_to_hex(hash));
finish_tmp_packfile(&packname, state->pack_tmp_name,
state->written, state->nr_written,
&state->pack_idx_opts, hash);
strbuf_addf(&packname, "%s/pack/pack-%s.",
repo_get_object_directory(transaction->odb->repo),
hash_to_hex_algop(hash, repo->hash_algo));

finish_tmp_packfile(transaction, &packname, hash);
for (uint32_t i = 0; i < state->nr_written; i++)
free(state->written[i]);

@ -90,18 +95,18 @@ clear_exit:

strbuf_release(&packname);
/* Make objects we just wrote available to ourselves */
reprepare_packed_git(the_repository);
reprepare_packed_git(repo);
}

/*
* Cleanup after batch-mode fsync_object_files.
*/
static void flush_batch_fsync(void)
static void flush_batch_fsync(struct odb_transaction *transaction)
{
struct strbuf temp_path = STRBUF_INIT;
struct tempfile *temp;

if (!bulk_fsync_objdir)
if (!transaction->objdir)
return;

/*
@ -113,7 +118,8 @@ static void flush_batch_fsync(void)
* to ensure that the data in each new object file is durable before
* the final name is visible.
*/
strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
repo_get_object_directory(transaction->odb->repo));
temp = xmks_tempfile(temp_path.buf);
fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
delete_tempfile(&temp);
@ -123,20 +129,21 @@ static void flush_batch_fsync(void)
* Make the object files visible in the primary ODB after their data is
* fully durable.
*/
tmp_objdir_migrate(bulk_fsync_objdir);
bulk_fsync_objdir = NULL;
tmp_objdir_migrate(transaction->objdir);
transaction->objdir = NULL;
}

static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
static int already_written(struct odb_transaction *transaction,
struct object_id *oid)
{
/* The object may already exist in the repository */
if (odb_has_object(the_repository->objects, oid,
if (odb_has_object(transaction->odb, oid,
HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
return 1;

/* Might want to keep the list sorted */
for (uint32_t i = 0; i < state->nr_written; i++)
if (oideq(&state->written[i]->oid, oid))
for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
if (oideq(&transaction->packfile.written[i]->oid, oid))
return 1;

/* This is a new object we need to keep */
@ -235,13 +242,15 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
}

/* Lazily create backing packfile for the state */
static void prepare_to_stream(struct bulk_checkin_packfile *state,
static void prepare_to_stream(struct odb_transaction *transaction,
unsigned flags)
{
struct bulk_checkin_packfile *state = &transaction->packfile;
if (!(flags & INDEX_WRITE_OBJECT) || state->f)
return;

state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name);
state->f = create_tmp_packfile(transaction->odb->repo,
&state->pack_tmp_name);
reset_pack_idx_option(&state->pack_idx_opts);

/* Pretend we are going to write only one object */
@ -250,11 +259,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state,
die_errno("unable to write pack header");
}

static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
struct object_id *result_oid,
int fd, size_t size,
const char *path, unsigned flags)
int index_blob_bulk_checkin(struct odb_transaction *transaction,
struct object_id *result_oid, int fd, size_t size,
const char *path, unsigned flags)
{
struct bulk_checkin_packfile *state = &transaction->packfile;
off_t seekback, already_hashed_to;
struct git_hash_ctx ctx;
unsigned char obuf[16384];
@ -268,21 +277,21 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,

header_len = format_object_header((char *)obuf, sizeof(obuf),
OBJ_BLOB, size);
the_hash_algo->init_fn(&ctx);
transaction->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);

/* Note: idx is non-NULL when we are writing */
if ((flags & INDEX_WRITE_OBJECT) != 0) {
CALLOC_ARRAY(idx, 1);

prepare_to_stream(state, flags);
prepare_to_stream(transaction, flags);
hashfile_checkpoint_init(state->f, &checkpoint);
}

already_hashed_to = 0;

while (1) {
prepare_to_stream(state, flags);
prepare_to_stream(transaction, flags);
if (idx) {
hashfile_checkpoint(state->f, &checkpoint);
idx->offset = state->offset;
@ -300,7 +309,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
BUG("should not happen");
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
flush_bulk_checkin_packfile(state);
flush_bulk_checkin_packfile(transaction);
if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
return error("cannot seek back");
}
@ -309,7 +318,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
return 0;

idx->crc32 = crc32_end(state->f);
if (already_written(state, result_oid)) {
if (already_written(transaction, result_oid)) {
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
free(idx);
@ -323,7 +332,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
return 0;
}

void prepare_loose_object_bulk_checkin(void)
void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction)
{
/*
* We lazily create the temporary object directory
@ -331,15 +340,16 @@ void prepare_loose_object_bulk_checkin(void)
* callers may not know whether any objects will be
* added at the time they call begin_odb_transaction.
*/
if (!odb_transaction_nesting || bulk_fsync_objdir)
if (!transaction || transaction->objdir)
return;

bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync");
if (bulk_fsync_objdir)
tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync");
if (transaction->objdir)
tmp_objdir_replace_primary_odb(transaction->objdir, 0);
}

void fsync_loose_object_bulk_checkin(int fd, const char *filename)
void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
int fd, const char *filename)
{
/*
* If we have an active ODB transaction, we issue a call that
@ -348,7 +358,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
* before renaming the objects to their final names as part of
* flush_batch_fsync.
*/
if (!bulk_fsync_objdir ||
if (!transaction || !transaction->objdir ||
git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {
if (errno == ENOSYS)
warning(_("core.fsyncMethod = batch is unsupported on this platform"));
@ -356,36 +366,38 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename)
}
}

int index_blob_bulk_checkin(struct object_id *oid,
int fd, size_t size,
const char *path, unsigned flags)
struct odb_transaction *begin_odb_transaction(struct object_database *odb)
{
int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size,
path, flags);
if (!odb_transaction_nesting)
flush_bulk_checkin_packfile(&bulk_checkin_packfile);
return status;
if (!odb->transaction) {
CALLOC_ARRAY(odb->transaction, 1);
odb->transaction->odb = odb;
}

odb->transaction->nesting += 1;

return odb->transaction;
}

void begin_odb_transaction(void)
void flush_odb_transaction(struct odb_transaction *transaction)
{
odb_transaction_nesting += 1;
}

void flush_odb_transaction(void)
{
flush_batch_fsync();
flush_bulk_checkin_packfile(&bulk_checkin_packfile);
}

void end_odb_transaction(void)
{
odb_transaction_nesting -= 1;
if (odb_transaction_nesting < 0)
BUG("Unbalanced ODB transaction nesting");

if (odb_transaction_nesting)
if (!transaction)
return;

flush_odb_transaction();
flush_batch_fsync(transaction);
flush_bulk_checkin_packfile(transaction);
}

void end_odb_transaction(struct odb_transaction *transaction)
{
if (!transaction || transaction->nesting == 0)
BUG("Unbalanced ODB transaction nesting");

transaction->nesting -= 1;

if (transaction->nesting)
return;

flush_odb_transaction(transaction);
transaction->odb->transaction = NULL;
free(transaction);
}

View File

@ -5,13 +5,20 @@
#define BULK_CHECKIN_H

#include "object.h"
#include "odb.h"

void prepare_loose_object_bulk_checkin(void);
void fsync_loose_object_bulk_checkin(int fd, const char *filename);
struct odb_transaction;

void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction);
void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
int fd, const char *filename);

/*
* This creates one packfile per large blob unless bulk-checkin
* machinery is "plugged".
* This writes the specified object to a packfile. Objects written here
* during the same transaction are written to the same packfile. The
* packfile is not flushed until the transaction is flushed. The caller
* is expected to ensure a valid transaction is setup for objects to be
* recorded to.
*
* This also bypasses the usual "convert-to-git" dance, and that is on
* purpose. We could write a streaming version of the converting
@ -24,8 +31,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename);
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
int index_blob_bulk_checkin(struct object_id *oid,
int fd, size_t size,
int index_blob_bulk_checkin(struct odb_transaction *transaction,
struct object_id *oid, int fd, size_t size,
const char *path, unsigned flags);

/*
@ -35,20 +42,20 @@ int index_blob_bulk_checkin(struct object_id *oid,
* and objects are only visible after the outermost transaction
* is complete or the transaction is flushed.
*/
void begin_odb_transaction(void);
struct odb_transaction *begin_odb_transaction(struct object_database *odb);

/*
* Make any objects that are currently part of a pending object
* database transaction visible. It is valid to call this function
* even if no transaction is active.
*/
void flush_odb_transaction(void);
void flush_odb_transaction(struct odb_transaction *transaction);

/*
* Tell the object database to make any objects from the
* current transaction visible if this is the final nested
* transaction.
*/
void end_odb_transaction(void);
void end_odb_transaction(struct odb_transaction *transaction);

#endif

View File

@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it,

int cache_tree_update(struct index_state *istate, int flags)
{
struct odb_transaction *transaction;
int skip, i;

i = verify_cache(istate, flags);
@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags)

trace_performance_enter();
trace2_region_enter("cache_tree", "update", the_repository);
begin_odb_transaction();
transaction = begin_odb_transaction(the_repository->objects);
i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,
"", 0, &skip, flags);
end_odb_transaction();
end_odb_transaction(transaction);
trace2_region_leave("cache_tree", "update", the_repository);
trace_performance_leave("cache_tree_update");
if (i < 0)

View File

@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source,
goto out;

if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
fsync_loose_object_bulk_checkin(fd, filename);
fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename);
else if (fsync_object_files > 0)
fsync_or_die(fd, filename);
else
@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source,
static struct strbuf filename = STRBUF_INIT;

if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
prepare_loose_object_bulk_checkin();
prepare_loose_object_bulk_checkin(source->odb->transaction);

odb_loose_path(source, &filename, oid);

@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source,
int hdrlen;

if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
prepare_loose_object_bulk_checkin();
prepare_loose_object_bulk_checkin(source->odb->transaction);

/* Since oid is not determined, save tmp file to odb path. */
strbuf_addf(&filename, "%s/", source->path);
@ -1253,18 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid,
* Call xsize_t() only when needed to avoid potentially unnecessary
* die() for large files.
*/
if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) {
ret = index_stream_convert_blob(istate, oid, fd, path, flags);
else if (!S_ISREG(st->st_mode))
} else if (!S_ISREG(st->st_mode)) {
ret = index_pipe(istate, oid, fd, type, path, flags);
else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
type != OBJ_BLOB ||
(path && would_convert_to_git(istate, path)))
} else if ((st->st_size >= 0 &&
(size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
type != OBJ_BLOB ||
(path && would_convert_to_git(istate, path))) {
ret = index_core(istate, oid, fd, xsize_t(st->st_size),
type, path, flags);
else
ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path,
flags);
} else {
struct odb_transaction *transaction;

transaction = begin_odb_transaction(the_repository->objects);
ret = index_blob_bulk_checkin(transaction,
oid, fd, xsize_t(st->st_size),
path, flags);
end_odb_transaction(transaction);
}

close(fd);
return ret;
}

8
odb.h
View File

@ -92,6 +92,7 @@ struct odb_source {

struct packed_git;
struct cached_object_entry;
struct odb_transaction;

/*
* The object database encapsulates access to objects in a repository. It
@ -102,6 +103,13 @@ struct object_database {
/* Repository that owns this database. */
struct repository *repo;

/*
* State of current current object database transaction. Only one
* transaction may be pending at a time. Is NULL when no transaction is
* configured.
*/
struct odb_transaction *transaction;

/*
* Set of all object directories; the main directory is first (and
* cannot be NULL after initialization). Subsequent directories are

View File

@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
const struct pathspec *pathspec, char *ps_matched,
int include_sparse, int flags)
{
struct odb_transaction *transaction;
struct update_callback_data data;
struct rev_info rev;

@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix,
* This function is invoked from commands other than 'add', which
* may not have their own transaction active.
*/
begin_odb_transaction();
transaction = begin_odb_transaction(repo->objects);
run_diff_files(&rev, DIFF_RACY_IS_MODIFIED);
end_odb_transaction();
end_odb_transaction(transaction);

release_revisions(&rev);
return !!data.add_errors;