Merge branch 'kn/fetch-push-bulk-ref-update'

"git push" and "git fetch" are taught to update refs in batches to
gain performance.

* kn/fetch-push-bulk-ref-update:
  receive-pack: handle reference deletions separately
  refs/files: skip updates with errors in batched updates
  receive-pack: use batched reference updates
  send-pack: fix memory leak around duplicate refs
  fetch: use batched reference updates
  refs: add function to translate errors to strings
maint
Junio C Hamano 2025-07-08 15:49:19 -07:00
commit cdb7872247
11 changed files with 265 additions and 103 deletions

View File

@ -640,9 +640,6 @@ static struct ref *get_ref_map(struct remote *remote,
return ref_map;
}

#define STORE_REF_ERROR_OTHER 1
#define STORE_REF_ERROR_DF_CONFLICT 2

static int s_update_ref(const char *action,
struct ref *ref,
struct ref_transaction *transaction,
@ -650,7 +647,6 @@ static int s_update_ref(const char *action,
{
char *msg;
char *rla = getenv("GIT_REFLOG_ACTION");
struct ref_transaction *our_transaction = NULL;
struct strbuf err = STRBUF_INIT;
int ret;

@ -660,43 +656,10 @@ static int s_update_ref(const char *action,
rla = default_rla.buf;
msg = xstrfmt("%s: %s", rla, action);

/*
* If no transaction was passed to us, we manage the transaction
* ourselves. Otherwise, we trust the caller to handle the transaction
* lifecycle.
*/
if (!transaction) {
transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
0, &err);
if (!transaction) {
ret = STORE_REF_ERROR_OTHER;
goto out;
}
}

ret = ref_transaction_update(transaction, ref->name, &ref->new_oid,
check_old ? &ref->old_oid : NULL,
NULL, NULL, 0, msg, &err);
if (ret) {
ret = STORE_REF_ERROR_OTHER;
goto out;
}

if (our_transaction) {
switch (ref_transaction_commit(our_transaction, &err)) {
case 0:
break;
case REF_TRANSACTION_ERROR_NAME_CONFLICT:
ret = STORE_REF_ERROR_DF_CONFLICT;
goto out;
default:
ret = STORE_REF_ERROR_OTHER;
goto out;
}
}

out:
ref_transaction_free(our_transaction);
if (ret)
error("%s", err.buf);
strbuf_release(&err);
@ -1139,7 +1102,6 @@ N_("it took %.2f seconds to check forced updates; you can use\n"
"to avoid this check\n");

static int store_updated_refs(struct display_state *display_state,
const char *remote_name,
int connectivity_checked,
struct ref_transaction *transaction, struct ref *ref_map,
struct fetch_head *fetch_head,
@ -1277,11 +1239,6 @@ static int store_updated_refs(struct display_state *display_state,
}
}

if (rc & STORE_REF_ERROR_DF_CONFLICT)
error(_("some local refs could not be updated; try running\n"
" 'git remote prune %s' to remove any old, conflicting "
"branches"), remote_name);

if (advice_enabled(ADVICE_FETCH_SHOW_FORCED_UPDATES)) {
if (!config->show_forced_updates) {
warning(_(warn_show_forced_updates));
@ -1365,9 +1322,8 @@ static int fetch_and_consume_refs(struct display_state *display_state,
}

trace2_region_enter("fetch", "consume_refs", the_repository);
ret = store_updated_refs(display_state, transport->remote->name,
connectivity_checked, transaction, ref_map,
fetch_head, config);
ret = store_updated_refs(display_state, connectivity_checked,
transaction, ref_map, fetch_head, config);
trace2_region_leave("fetch", "consume_refs", the_repository);

out:
@ -1687,6 +1643,36 @@ cleanup:
return result;
}

struct ref_rejection_data {
int *retcode;
int conflict_msg_shown;
const char *remote_name;
};

static void ref_transaction_rejection_handler(const char *refname,
const struct object_id *old_oid UNUSED,
const struct object_id *new_oid UNUSED,
const char *old_target UNUSED,
const char *new_target UNUSED,
enum ref_transaction_error err,
void *cb_data)
{
struct ref_rejection_data *data = cb_data;

if (err == REF_TRANSACTION_ERROR_NAME_CONFLICT && !data->conflict_msg_shown) {
error(_("some local refs could not be updated; try running\n"
" 'git remote prune %s' to remove any old, conflicting "
"branches"), data->remote_name);
data->conflict_msg_shown = 1;
} else {
const char *reason = ref_transaction_error_msg(err);

error(_("fetching ref %s failed: %s"), refname, reason);
}

*data->retcode = 1;
}

static int do_fetch(struct transport *transport,
struct refspec *rs,
const struct fetch_config *config)
@ -1807,6 +1793,24 @@ static int do_fetch(struct transport *transport,
retcode = 1;
}

/*
* If not atomic, we can still use batched updates, which would be much
* more performant. We don't initiate the transaction before pruning,
* since pruning must be an independent step, to avoid F/D conflicts.
*
* TODO: if reference transactions gain logical conflict resolution, we
* can delete and create refs (with F/D conflicts) in the same transaction
* and this can be moved above the 'prune_refs()' block.
*/
if (!transaction) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
REF_TRANSACTION_ALLOW_FAILURE, &err);
if (!transaction) {
retcode = -1;
goto cleanup;
}
}

if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map,
&fetch_head, config)) {
retcode = 1;
@ -1838,7 +1842,6 @@ static int do_fetch(struct transport *transport,
free_refs(tags_ref_map);
}

if (transaction) {
if (retcode)
goto cleanup;

@ -1852,6 +1855,22 @@ static int do_fetch(struct transport *transport,
transaction = NULL;
goto cleanup;
}

if (!atomic_fetch) {
struct ref_rejection_data data = {
.retcode = &retcode,
.conflict_msg_shown = 0,
.remote_name = transport->remote->name,
};

ref_transaction_for_each_rejected_update(transaction,
ref_transaction_rejection_handler,
&data);
if (retcode) {
ref_transaction_free(transaction);
transaction = NULL;
goto cleanup;
}
}

commit_fetch_head(&fetch_head);

View File

@ -1846,36 +1846,102 @@ static void BUG_if_skipped_connectivity_check(struct command *commands,
BUG_if_bug("connectivity check skipped???");
}

static void ref_transaction_rejection_handler(const char *refname,
const struct object_id *old_oid UNUSED,
const struct object_id *new_oid UNUSED,
const char *old_target UNUSED,
const char *new_target UNUSED,
enum ref_transaction_error err,
void *cb_data)
{
struct strmap *failed_refs = cb_data;

strmap_put(failed_refs, refname, (char *)ref_transaction_error_msg(err));
}

static void execute_commands_non_atomic(struct command *commands,
struct shallow_info *si)
{
struct command *cmd;
struct strbuf err = STRBUF_INIT;
const char *reported_error = NULL;
struct strmap failed_refs = STRMAP_INIT;

/*
* Reference updates, where D/F conflicts shouldn't arise due to
* one reference being deleted, while the other being created
* are treated as conflicts in batched updates. This is because
* we don't do conflict resolution inside a transaction. To
* mitigate this, delete references in a separate batch.
*
* NEEDSWORK: Add conflict resolution between deletion and creation
* of reference updates within a transaction. With that, we can
* combine the two phases.
*/
enum processing_phase {
PHASE_DELETIONS,
PHASE_OTHERS
};

for (enum processing_phase phase = PHASE_DELETIONS; phase <= PHASE_OTHERS; phase++) {
for (cmd = commands; cmd; cmd = cmd->next) {
if (!should_process_cmd(cmd) || cmd->run_proc_receive)
continue;

if (phase == PHASE_DELETIONS && !is_null_oid(&cmd->new_oid))
continue;
else if (phase == PHASE_OTHERS && is_null_oid(&cmd->new_oid))
continue;

/*
* Lazily create a transaction only when we know there are
* updates to be added.
*/
if (!transaction) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
0, &err);
REF_TRANSACTION_ALLOW_FAILURE, &err);
if (!transaction) {
rp_error("%s", err.buf);
strbuf_reset(&err);
cmd->error_string = "transaction failed to start";
continue;
reported_error = "transaction failed to start";
goto failure;
}
}

cmd->error_string = update(cmd, si);
}

if (!cmd->error_string
&& ref_transaction_commit(transaction, &err)) {
/* No transaction, so nothing to commit */
if (!transaction)
goto cleanup;

if (ref_transaction_commit(transaction, &err)) {
rp_error("%s", err.buf);
strbuf_reset(&err);
cmd->error_string = "failed to update ref";
reported_error = "failed to update refs";
goto failure;
}

ref_transaction_for_each_rejected_update(transaction,
ref_transaction_rejection_handler,
&failed_refs);

if (strmap_empty(&failed_refs))
goto cleanup;

failure:
for (cmd = commands; cmd; cmd = cmd->next) {
if (reported_error)
cmd->error_string = reported_error;
else if (strmap_contains(&failed_refs, cmd->ref_name))
cmd->error_string = strmap_get(&failed_refs, cmd->ref_name);
}

cleanup:
ref_transaction_free(transaction);
}
transaction = NULL;
strmap_clear(&failed_refs, 0);
strbuf_release(&err);
}
}

static void execute_commands_atomic(struct command *commands,

View File

@ -575,30 +575,7 @@ static void print_rejected_refs(const char *refname,
void *cb_data UNUSED)
{
struct strbuf sb = STRBUF_INIT;
const char *reason = "";

switch (err) {
case REF_TRANSACTION_ERROR_NAME_CONFLICT:
reason = "refname conflict";
break;
case REF_TRANSACTION_ERROR_CREATE_EXISTS:
reason = "reference already exists";
break;
case REF_TRANSACTION_ERROR_NONEXISTENT_REF:
reason = "reference does not exist";
break;
case REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE:
reason = "incorrect old value provided";
break;
case REF_TRANSACTION_ERROR_INVALID_NEW_VALUE:
reason = "invalid new value provided";
break;
case REF_TRANSACTION_ERROR_EXPECTED_SYMREF:
reason = "expected symref but found regular ref";
break;
default:
reason = "unkown failure";
}
const char *reason = ref_transaction_error_msg(err);

strbuf_addf(&sb, "rejected %s %s %s %s\n", refname,
new_oid ? oid_to_hex(new_oid) : new_target,

20
refs.c
View File

@ -3314,3 +3314,23 @@ int ref_update_expects_existing_old_ref(struct ref_update *update)
return (update->flags & REF_HAVE_OLD) &&
(!is_null_oid(&update->old_oid) || update->old_target);
}

const char *ref_transaction_error_msg(enum ref_transaction_error err)
{
switch (err) {
case REF_TRANSACTION_ERROR_NAME_CONFLICT:
return "refname conflict";
case REF_TRANSACTION_ERROR_CREATE_EXISTS:
return "reference already exists";
case REF_TRANSACTION_ERROR_NONEXISTENT_REF:
return "reference does not exist";
case REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE:
return "incorrect old value provided";
case REF_TRANSACTION_ERROR_INVALID_NEW_VALUE:
return "invalid new value provided";
case REF_TRANSACTION_ERROR_EXPECTED_SYMREF:
return "expected symref but found regular ref";
default:
return "unknown failure";
}
}

5
refs.h
View File

@ -907,6 +907,11 @@ void ref_transaction_for_each_rejected_update(struct ref_transaction *transactio
ref_transaction_for_each_rejected_update_fn cb,
void *cb_data);

/*
* Translate errors to human readable error messages.
*/
const char *ref_transaction_error_msg(enum ref_transaction_error err);

/*
* Free `*transaction` and all associated data.
*/

View File

@ -3208,6 +3208,10 @@ static int files_transaction_finish(struct ref_store *ref_store,
*/
for (i = 0; i < transaction->nr; i++) {
struct ref_update *update = transaction->updates[i];

if (update->rejection_err)
continue;

if (update->flags & REF_DELETING &&
!(update->flags & REF_LOG_ONLY) &&
!(update->flags & REF_IS_PRUNING)) {
@ -3239,6 +3243,9 @@ static int files_transaction_finish(struct ref_store *ref_store,
struct ref_update *update = transaction->updates[i];
struct ref_lock *lock = update->backend_data;

if (update->rejection_err)
continue;

if (update->flags & REF_DELETING &&
!(update->flags & REF_LOG_ONLY)) {
update->flags |= REF_DELETED_RMDIR;

View File

@ -257,6 +257,13 @@ static int receive_status(struct repository *r,
refname);
continue;
}

/*
* Clients sending duplicate refs can cause the same value
* to be overridden, causing a memory leak.
*/
free(hint->remote_status);

if (!strcmp(head, "ng")) {
hint->status = REF_STATUS_REMOTE_REJECT;
if (p)

View File

@ -2293,6 +2293,51 @@ do
test_grep -q "refname conflict" stdout
)
'

test_expect_success "stdin $type batch-updates delete incorrect symbolic ref" '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit c1 &&
head=$(git rev-parse HEAD) &&
git symbolic-ref refs/heads/symbolic refs/heads/non-existent &&

format_command $type "delete refs/heads/symbolic" "$head" >stdin &&
git update-ref $type --stdin --batch-updates <stdin >stdout &&
test_grep "reference does not exist" stdout
)
'

test_expect_success "stdin $type batch-updates delete with incorrect old_oid" '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit c1 &&
git branch new-branch &&
test_commit c2 &&
head=$(git rev-parse HEAD) &&

format_command $type "delete refs/heads/new-branch" "$head" >stdin &&
git update-ref $type --stdin --batch-updates <stdin >stdout &&
test_grep "incorrect old value provided" stdout
)
'

test_expect_success "stdin $type batch-updates delete non-existent ref" '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit commit &&
head=$(git rev-parse HEAD) &&

format_command $type "delete refs/heads/non-existent" "$head" >stdin &&
git update-ref $type --stdin --batch-updates <stdin >stdout &&
test_grep "reference does not exist" stdout
)
'
done

test_expect_success 'update-ref should also create reflog for HEAD' '

View File

@ -120,8 +120,6 @@ test_expect_success 'interleaving hook calls succeed' '

cat >expect <<-EOF &&
hooks/update refs/tags/PRE $ZERO_OID $PRE_OID
hooks/reference-transaction prepared
hooks/reference-transaction committed
hooks/update refs/tags/POST $ZERO_OID $POST_OID
hooks/reference-transaction prepared
hooks/reference-transaction committed

View File

@ -69,15 +69,24 @@ test_expect_success 'stdin mixed with cmdline' '

test_expect_success 'cmdline refs written in order' '
clear_remote &&
test_must_fail git send-pack remote.git A:foo B:foo &&
verify_push A foo
test_must_fail git send-pack remote.git A:foo B:foo 2>err &&
test_grep "multiple updates for ref ${SQ}refs/heads/foo${SQ} not allowed" err &&
test_must_fail git --git-dir=remote.git rev-parse foo
'

test_expect_success 'cmdline refs with multiple duplicates' '
clear_remote &&
test_must_fail git send-pack remote.git A:foo B:foo C:foo 2>err &&
test_grep "multiple updates for ref ${SQ}refs/heads/foo${SQ} not allowed" err &&
test_must_fail git --git-dir=remote.git rev-parse foo
'

test_expect_success '--stdin refs come after cmdline' '
clear_remote &&
echo A:foo >input &&
test_must_fail git send-pack remote.git --stdin B:foo <input &&
verify_push B foo
test_grep "multiple updates for ref ${SQ}refs/heads/foo${SQ} not allowed" err &&
test_must_fail git --git-dir=remote.git rev-parse foo
'

test_expect_success 'refspecs and --mirror do not mix (cmdline)' '

View File

@ -744,8 +744,8 @@ test_expect_success 'pushing valid refs triggers post-receive and post-update ho
EOF

cat >update.expect <<-EOF &&
refs/heads/main $orgmain $newmain
refs/heads/next $orgnext $newnext
refs/heads/main $orgmain $newmain
EOF

cat >post-receive.expect <<-EOF &&
@ -808,8 +808,8 @@ test_expect_success 'deletion of a non-existent ref is not fed to post-receive a
EOF

cat >update.expect <<-EOF &&
refs/heads/main $orgmain $newmain
refs/heads/nonexistent $ZERO_OID $ZERO_OID
refs/heads/main $orgmain $newmain
EOF

cat >post-receive.expect <<-EOF &&
@ -868,10 +868,10 @@ test_expect_success 'mixed ref updates, deletes, invalid deletes trigger hooks w
EOF

cat >update.expect <<-EOF &&
refs/heads/main $orgmain $newmain
refs/heads/next $orgnext $newnext
refs/heads/seen $orgseen $newseen
refs/heads/nonexistent $ZERO_OID $ZERO_OID
refs/heads/main $orgmain $newmain
refs/heads/seen $orgseen $newseen
EOF

cat >post-receive.expect <<-EOF &&
@ -1919,4 +1919,13 @@ test_expect_success 'push with config pack.usePathWalk=true' '
test_region pack-objects path-walk path-walk.txt
'

test_expect_success 'push with F/D conflict with deletion and creation' '
test_when_finished "git branch -D branch" &&
git branch branch/conflict &&
mk_test testrepo heads/branch/conflict &&
git branch -D branch/conflict &&
git branch branch &&
git push testrepo :refs/heads/branch/conflict refs/heads/branch
'

test_done