Merge branch 'cc/signed-fast-export-import'

"git fast-export | git fast-import" learns to deal with commit and
tag objects with embedded signatures a bit better.

* cc/signed-fast-export-import:
  fast-export, fast-import: add support for signed-commits
  fast-export: do not modify memory from get_commit_buffer
  git-fast-export.adoc: clarify why 'verbatim' may not be a good idea
  fast-export: rename --signed-tags='warn' to 'warn-verbatim'
  fast-export: fix missing whitespace after switch
  git-fast-import.adoc: add missing LF in the BNF
maint
Junio C Hamano 2025-03-29 16:39:06 +09:00
commit 01d17c0530
5 changed files with 317 additions and 56 deletions

View File

@ -27,17 +27,33 @@ OPTIONS
Insert 'progress' statements every <n> objects, to be shown by
'git fast-import' during import.

--signed-tags=(verbatim|warn|warn-strip|strip|abort)::
--signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort)::
Specify how to handle signed tags. Since any transformation
after the export can change the tag names (which can also happen
when excluding revisions) the signatures will not match.
after the export (or during the export, such as excluding
revisions) can change the hashes being signed, the signatures
may become invalid.
+
When asking to 'abort' (which is the default), this program will die
when encountering a signed tag. With 'strip', the tags will silently
be made unsigned, with 'warn-strip' they will be made unsigned but a
warning will be displayed, with 'verbatim', they will be silently
exported and with 'warn', they will be exported, but you will see a
warning.
exported and with 'warn-verbatim' (or 'warn', a deprecated synonym),
they will be exported, but you will see a warning. 'verbatim' and
'warn-verbatim' should only be used if you know that no transformation
affecting tags or any commit in their history will be performed by you
or by fast-export or fast-import, or if you do not care that the
resulting tag will have an invalid signature.

--signed-commits=(verbatim|warn-verbatim|warn-strip|strip|abort)::
Specify how to handle signed commits. Behaves exactly as
'--signed-tags', but for commits. Default is 'abort'.
+
Earlier versions this command that did not have '--signed-commits'
behaved as if '--signed-commits=strip'. As an escape hatch for users
of tools that call 'git fast-export' but do not yet support
'--signed-commits', you may set the environment variable
'FAST_EXPORT_SIGNED_COMMITS_NOABORT=1' in order to change the default
from 'abort' to 'warn-strip'.

--tag-of-filtered-object=(abort|drop|rewrite)::
Specify how to handle tags whose tagged object is filtered out.

View File

@ -431,13 +431,22 @@ and control the current import process. More detailed discussion
Create or update a branch with a new commit, recording one logical
change to the project.

////
Yes, it's intentional that the 'gpgsig' line doesn't have a trailing
`LF`; the definition of `data` has a byte-count prefix, so it
doesn't need an `LF` to act as a terminator (and `data` also already
includes an optional trailing `LF?` just in case you want to include
one).
////

....
'commit' SP <ref> LF
mark?
original-oid?
('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
'committer' (SP <name>)? SP LT <email> GT SP <when> LF
('encoding' SP <encoding>)?
('gpgsig' SP <alg> LF data)?
('encoding' SP <encoding> LF)?
data
('from' SP <commit-ish> LF)?
('merge' SP <commit-ish> LF)*
@ -505,6 +514,15 @@ that was selected by the --date-format=<fmt> command-line option.
See ``Date Formats'' above for the set of supported formats, and
their syntax.

`gpgsig`
^^^^^^^^

The optional `gpgsig` command is used to include a PGP/GPG signature
that signs the commit data.

Here <alg> specifies which hashing algorithm is used for this
signature, either `sha1` or `sha256`.

`encoding`
^^^^^^^^^^
The optional `encoding` command indicates the encoding of the commit

View File

@ -35,8 +35,11 @@ static const char *fast_export_usage[] = {
NULL
};

enum sign_mode { SIGN_ABORT, SIGN_VERBATIM, SIGN_STRIP, SIGN_WARN_VERBATIM, SIGN_WARN_STRIP };

static int progress;
static enum signed_tag_mode { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
static enum sign_mode signed_tag_mode = SIGN_ABORT;
static enum sign_mode signed_commit_mode = SIGN_ABORT;
static enum tag_of_filtered_mode { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
static enum reencode_mode { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT;
static int fake_missing_tagger;
@ -53,23 +56,24 @@ static int anonymize;
static struct hashmap anonymized_seeds;
static struct revision_sources revision_sources;

static int parse_opt_signed_tag_mode(const struct option *opt,
static int parse_opt_sign_mode(const struct option *opt,
const char *arg, int unset)
{
enum signed_tag_mode *val = opt->value;

if (unset || !strcmp(arg, "abort"))
*val = SIGNED_TAG_ABORT;
enum sign_mode *val = opt->value;
if (unset)
return 0;
else if (!strcmp(arg, "abort"))
*val = SIGN_ABORT;
else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
*val = VERBATIM;
else if (!strcmp(arg, "warn"))
*val = WARN;
*val = SIGN_VERBATIM;
else if (!strcmp(arg, "warn-verbatim") || !strcmp(arg, "warn"))
*val = SIGN_WARN_VERBATIM;
else if (!strcmp(arg, "warn-strip"))
*val = WARN_STRIP;
*val = SIGN_WARN_STRIP;
else if (!strcmp(arg, "strip"))
*val = STRIP;
*val = SIGN_STRIP;
else
return error("Unknown signed-tags mode: %s", arg);
return error("Unknown %s mode: %s", opt->long_name, arg);
return 0;
}

@ -510,21 +514,6 @@ static void show_filemodify(struct diff_queue_struct *q,
}
}

static const char *find_encoding(const char *begin, const char *end)
{
const char *needle = "\nencoding ";
char *bol, *eol;

bol = memmem(begin, end ? end - begin : strlen(begin),
needle, strlen(needle));
if (!bol)
return NULL;
bol += strlen(needle);
eol = strchrnul(bol, '\n');
*eol = '\0';
return bol;
}

static char *anonymize_ref_component(void)
{
static int counter;
@ -626,13 +615,53 @@ static void anonymize_ident_line(const char **beg, const char **end)
*end = out->buf + out->len;
}

/*
* find_commit_multiline_header is similar to find_commit_header,
* except that it handles multi-line headers, rather than simply
* returning the first line of the header.
*
* The returned string has had the ' ' line continuation markers
* removed, and points to allocated memory that must be free()d (not
* to memory within 'msg').
*
* If the header is found, then *end is set to point at the '\n' in
* msg that immediately follows the header value.
*/
static const char *find_commit_multiline_header(const char *msg,
const char *key,
const char **end)
{
struct strbuf val = STRBUF_INIT;
const char *bol, *eol;
size_t len;

bol = find_commit_header(msg, key, &len);
if (!bol)
return NULL;
eol = bol + len;
strbuf_add(&val, bol, len);

while (eol[0] == '\n' && eol[1] == ' ') {
bol = eol + 2;
eol = strchrnul(bol, '\n');
strbuf_addch(&val, '\n');
strbuf_add(&val, bol, eol - bol);
}

*end = eol;
return strbuf_detach(&val, NULL);
}

static void handle_commit(struct commit *commit, struct rev_info *rev,
struct string_list *paths_of_changed_objects)
{
int saved_output_format = rev->diffopt.output_format;
const char *commit_buffer;
const char *commit_buffer, *commit_buffer_cursor;
const char *author, *author_end, *committer, *committer_end;
const char *encoding, *message;
const char *encoding = NULL;
size_t encoding_len;
const char *signature_alg = NULL, *signature = NULL;
const char *message;
char *reencoded = NULL;
struct commit_list *p;
const char *refname;
@ -641,21 +670,43 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;

parse_commit_or_die(commit);
commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
author = strstr(commit_buffer, "\nauthor ");
commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);

author = strstr(commit_buffer_cursor, "\nauthor ");
if (!author)
die("could not find author in commit %s",
oid_to_hex(&commit->object.oid));
author++;
author_end = strchrnul(author, '\n');
committer = strstr(author_end, "\ncommitter ");
commit_buffer_cursor = author_end = strchrnul(author, '\n');

committer = strstr(commit_buffer_cursor, "\ncommitter ");
if (!committer)
die("could not find committer in commit %s",
oid_to_hex(&commit->object.oid));
committer++;
committer_end = strchrnul(committer, '\n');
message = strstr(committer_end, "\n\n");
encoding = find_encoding(committer_end, message);
commit_buffer_cursor = committer_end = strchrnul(committer, '\n');

/*
* find_commit_header() and find_commit_multiline_header() get
* a `+ 1` because commit_buffer_cursor points at the trailing
* "\n" at the end of the previous line, but they want a
* pointer to the beginning of the next line.
*/

if (*commit_buffer_cursor == '\n') {
encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
if (encoding)
commit_buffer_cursor = encoding + encoding_len;
}

if (*commit_buffer_cursor == '\n') {
if ((signature = find_commit_multiline_header(commit_buffer_cursor + 1, "gpgsig", &commit_buffer_cursor)))
signature_alg = "sha1";
else if ((signature = find_commit_multiline_header(commit_buffer_cursor + 1, "gpgsig-sha256", &commit_buffer_cursor)))
signature_alg = "sha256";
}

message = strstr(commit_buffer_cursor, "\n\n");
if (message)
message += 2;

@ -694,16 +745,20 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
if (anonymize) {
reencoded = anonymize_commit_message();
} else if (encoding) {
switch(reencode_mode) {
char *buf;
switch (reencode_mode) {
case REENCODE_YES:
reencoded = reencode_string(message, "UTF-8", encoding);
buf = xstrfmt("%.*s", (int)encoding_len, encoding);
reencoded = reencode_string(message, "UTF-8", buf);
free(buf);
break;
case REENCODE_NO:
break;
case REENCODE_ABORT:
die("Encountered commit-specific encoding %s in commit "
die("Encountered commit-specific encoding %.*s in commit "
"%s; use --reencode=[yes|no] to handle it",
encoding, oid_to_hex(&commit->object.oid));
(int)encoding_len, encoding,
oid_to_hex(&commit->object.oid));
}
}
if (!commit->parents)
@ -714,8 +769,33 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
printf("%.*s\n%.*s\n",
(int)(author_end - author), author,
(int)(committer_end - committer), committer);
if (signature) {
switch (signed_commit_mode) {
case SIGN_ABORT:
die("encountered signed commit %s; use "
"--signed-commits=<mode> to handle it",
oid_to_hex(&commit->object.oid));
case SIGN_WARN_VERBATIM:
warning("exporting signed commit %s",
oid_to_hex(&commit->object.oid));
/* fallthru */
case SIGN_VERBATIM:
printf("gpgsig %s\ndata %u\n%s",
signature_alg,
(unsigned)strlen(signature),
signature);
break;
case SIGN_WARN_STRIP:
warning("stripping signature from commit %s",
oid_to_hex(&commit->object.oid));
/* fallthru */
case SIGN_STRIP:
break;
}
free((char *)signature);
}
if (!reencoded && encoding)
printf("encoding %s\n", encoding);
printf("encoding %.*s\n", (int)encoding_len, encoding);
printf("data %u\n%s",
(unsigned)(reencoded
? strlen(reencoded) : message
@ -828,22 +908,22 @@ static void handle_tag(const char *name, struct tag *tag)
const char *signature = strstr(message,
"\n-----BEGIN PGP SIGNATURE-----\n");
if (signature)
switch(signed_tag_mode) {
case SIGNED_TAG_ABORT:
switch (signed_tag_mode) {
case SIGN_ABORT:
die("encountered signed tag %s; use "
"--signed-tags=<mode> to handle it",
oid_to_hex(&tag->object.oid));
case WARN:
case SIGN_WARN_VERBATIM:
warning("exporting signed tag %s",
oid_to_hex(&tag->object.oid));
/* fallthru */
case VERBATIM:
case SIGN_VERBATIM:
break;
case WARN_STRIP:
case SIGN_WARN_STRIP:
warning("stripping signature from tag %s",
oid_to_hex(&tag->object.oid));
/* fallthru */
case STRIP:
case SIGN_STRIP:
message_size = signature + 1 - message;
break;
}
@ -853,7 +933,7 @@ static void handle_tag(const char *name, struct tag *tag)
tagged = tag->tagged;
tagged_mark = get_object_mark(tagged);
if (!tagged_mark) {
switch(tag_of_filtered_mode) {
switch (tag_of_filtered_mode) {
case TAG_FILTERING_ABORT:
die("tag %s tags unexported object; use "
"--tag-of-filtered-object=<mode> to handle it",
@ -965,7 +1045,7 @@ static void get_tags_and_duplicates(struct rev_cmdline_info *info)
continue;
}

switch(commit->object.type) {
switch (commit->object.type) {
case OBJ_COMMIT:
break;
case OBJ_BLOB:
@ -1189,6 +1269,7 @@ int cmd_fast_export(int argc,
const char *prefix,
struct repository *repo UNUSED)
{
const char *env_signed_commits_noabort;
struct rev_info revs;
struct commit *commit;
char *export_filename = NULL,
@ -1202,7 +1283,10 @@ int cmd_fast_export(int argc,
N_("show progress after <n> objects")),
OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
N_("select handling of signed tags"),
parse_opt_signed_tag_mode),
parse_opt_sign_mode),
OPT_CALLBACK(0, "signed-commits", &signed_commit_mode, N_("mode"),
N_("select handling of signed commits"),
parse_opt_sign_mode),
OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
N_("select handling of tags that tag filtered objects"),
parse_opt_tag_of_filtered_mode),
@ -1243,6 +1327,10 @@ int cmd_fast_export(int argc,
if (argc == 1)
usage_with_options (fast_export_usage, options);

env_signed_commits_noabort = getenv("FAST_EXPORT_SIGNED_COMMITS_NOABORT");
if (env_signed_commits_noabort && *env_signed_commits_noabort)
signed_commit_mode = SIGN_WARN_STRIP;

/* we handle encodings */
git_config(git_default_config, NULL);


View File

@ -2719,10 +2719,13 @@ static struct hash_list *parse_merge(unsigned int *count)

static void parse_new_commit(const char *arg)
{
static struct strbuf sig = STRBUF_INIT;
static struct strbuf msg = STRBUF_INIT;
struct string_list siglines = STRING_LIST_INIT_NODUP;
struct branch *b;
char *author = NULL;
char *committer = NULL;
char *sig_alg = NULL;
char *encoding = NULL;
struct hash_list *merge_list = NULL;
unsigned int merge_count;
@ -2746,6 +2749,13 @@ static void parse_new_commit(const char *arg)
}
if (!committer)
die("Expected committer but didn't get one");
if (skip_prefix(command_buf.buf, "gpgsig ", &v)) {
sig_alg = xstrdup(v);
read_next_command();
parse_data(&sig, 0, NULL);
read_next_command();
} else
strbuf_setlen(&sig, 0);
if (skip_prefix(command_buf.buf, "encoding ", &v)) {
encoding = xstrdup(v);
read_next_command();
@ -2819,10 +2829,23 @@ static void parse_new_commit(const char *arg)
strbuf_addf(&new_data,
"encoding %s\n",
encoding);
if (sig_alg) {
if (!strcmp(sig_alg, "sha1"))
strbuf_addstr(&new_data, "gpgsig ");
else if (!strcmp(sig_alg, "sha256"))
strbuf_addstr(&new_data, "gpgsig-sha256 ");
else
die("Expected gpgsig algorithm sha1 or sha256, got %s", sig_alg);
string_list_split_in_place(&siglines, sig.buf, "\n", -1);
strbuf_add_separated_string_list(&new_data, "\n ", &siglines);
strbuf_addch(&new_data, '\n');
}
strbuf_addch(&new_data, '\n');
strbuf_addbuf(&new_data, &msg);
string_list_clear(&siglines, 1);
free(author);
free(committer);
free(sig_alg);
free(encoding);

if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark))

View File

@ -8,6 +8,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME

. ./test-lib.sh
. "$TEST_DIRECTORY/lib-gpg.sh"

test_expect_success 'setup' '

@ -253,6 +254,24 @@ test_expect_success 'signed-tags=verbatim' '

'

test_expect_success 'signed-tags=warn-verbatim' '

git fast-export --signed-tags=warn-verbatim sign-your-name >output 2>err &&
grep PGP output &&
test -s err

'

# 'warn' is a backward-compatibility alias for 'warn-verbatim'; test
# that it keeps working.
test_expect_success 'signed-tags=warn' '

git fast-export --signed-tags=warn sign-your-name >output 2>err &&
grep PGP output &&
test -s err

'

test_expect_success 'signed-tags=strip' '

git fast-export --signed-tags=strip sign-your-name > output &&
@ -266,10 +285,107 @@ test_expect_success 'signed-tags=warn-strip' '
test -s err
'

test_expect_success GPG 'set up signed commit' '

# Generate a commit with both "gpgsig" and "encoding" set, so
# that we can test that fast-import gets the ordering correct
# between the two.
test_config i18n.commitEncoding ISO-8859-1 &&
git checkout -f -b commit-signing main &&
echo Sign your name >file-sign &&
git add file-sign &&
git commit -S -m "signed commit" &&
COMMIT_SIGNING=$(git rev-parse --verify commit-signing)

'

test_expect_success GPG 'signed-commits default' '

sane_unset FAST_EXPORT_SIGNED_COMMITS_NOABORT &&
test_must_fail git fast-export --reencode=no commit-signing &&

FAST_EXPORT_SIGNED_COMMITS_NOABORT=1 git fast-export --reencode=no commit-signing >output 2>err &&
! grep ^gpgsig output &&
grep "^encoding ISO-8859-1" output &&
test -s err &&
sed "s/commit-signing/commit-strip-signing/" output | (
cd new &&
git fast-import &&
STRIPPED=$(git rev-parse --verify refs/heads/commit-strip-signing) &&
test $COMMIT_SIGNING != $STRIPPED
)

'

test_expect_success GPG 'signed-commits=abort' '

test_must_fail git fast-export --signed-commits=abort commit-signing

'

test_expect_success GPG 'signed-commits=verbatim' '

git fast-export --signed-commits=verbatim --reencode=no commit-signing >output &&
grep "^gpgsig sha" output &&
grep "encoding ISO-8859-1" output &&
(
cd new &&
git fast-import &&
STRIPPED=$(git rev-parse --verify refs/heads/commit-signing) &&
test $COMMIT_SIGNING = $STRIPPED
) <output

'

test_expect_success GPG 'signed-commits=warn-verbatim' '

git fast-export --signed-commits=warn-verbatim --reencode=no commit-signing >output 2>err &&
grep "^gpgsig sha" output &&
grep "encoding ISO-8859-1" output &&
test -s err &&
(
cd new &&
git fast-import &&
STRIPPED=$(git rev-parse --verify refs/heads/commit-signing) &&
test $COMMIT_SIGNING = $STRIPPED
) <output

'

test_expect_success GPG 'signed-commits=strip' '

git fast-export --signed-commits=strip --reencode=no commit-signing >output &&
! grep ^gpgsig output &&
grep "^encoding ISO-8859-1" output &&
sed "s/commit-signing/commit-strip-signing/" output | (
cd new &&
git fast-import &&
STRIPPED=$(git rev-parse --verify refs/heads/commit-strip-signing) &&
test $COMMIT_SIGNING != $STRIPPED
)

'

test_expect_success GPG 'signed-commits=warn-strip' '

git fast-export --signed-commits=warn-strip --reencode=no commit-signing >output 2>err &&
! grep ^gpgsig output &&
grep "^encoding ISO-8859-1" output &&
test -s err &&
sed "s/commit-signing/commit-strip-signing/" output | (
cd new &&
git fast-import &&
STRIPPED=$(git rev-parse --verify refs/heads/commit-strip-signing) &&
test $COMMIT_SIGNING != $STRIPPED
)

'

test_expect_success 'setup submodule' '

test_config_global protocol.file.allow always &&
git checkout -f main &&
test_might_fail git update-ref -d refs/heads/commit-signing &&
mkdir sub &&
(
cd sub &&