rev-list: support delimiting objects with NUL bytes

When walking objects, git-rev-list(1) prints each object entry on a
separate line. Some options, such as `--objects`, may print additional
information about tree and blob object on the same line in the form:

        $ git rev-list --objects <rev>
        <tree/blob oid> SP [<path>] LF

Note that in this form the SP is appended regardless of whether the tree
or blob object has path information available. Paths containing a
newline are also truncated at the newline.

Introduce the `-z` option for git-rev-list(1) which reformats the output
to use NUL-delimiters between objects and associated info in the
following form:

        $ git rev-list -z --objects <rev>
        <oid> NUL [path=<path> NUL]

In this form, the start of each record is signaled by an OID entry that
is all hexidecimal and does not contain any '='. Additional path info
from `--objects` is appended to the record as a token/value pair
`path=<path>` as-is without any truncation.

For now, the `--objects` flag is the only options that can be used in
combination with `-z`. In a subsequent commit, NUL-delimited support for
other options is added. Other options that do not make sense when used
in combination with `-z` are rejected.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Justin Tobler 2025-03-19 13:34:08 -05:00 committed by Junio C Hamano
parent c9907a1916
commit c3d59c2e70
3 changed files with 86 additions and 5 deletions

View File

@ -361,6 +361,27 @@ ifdef::git-rev-list[]
--progress=<header>::
Show progress reports on stderr as objects are considered. The
`<header>` text will be printed with each progress update.

-z::
Instead of being newline-delimited, each outputted object and its
accompanying metadata is delimited using NUL bytes. Output is printed
in the following form:
+
-----------------------------------------------------------------------
<OID> NUL [<token>=<value> NUL]...
-----------------------------------------------------------------------
+
Additional object metadata, such as object paths, is printed using the
`<token>=<value>` form. Token values are printed as-is without any
encoding/truncation. An OID entry never contains a '=' character and thus
is used to signal the start of a new object record. Examples:
+
-----------------------------------------------------------------------
<OID> NUL
<OID> NUL path=<path> NUL
-----------------------------------------------------------------------
+
This mode is only compatible with the `--objects` output option.
endif::git-rev-list[]

History Simplification

View File

@ -65,6 +65,7 @@ static const char rev_list_usage[] =
" --abbrev-commit\n"
" --left-right\n"
" --count\n"
" -z\n"
" special purpose:\n"
" --bisect\n"
" --bisect-vars\n"
@ -97,6 +98,9 @@ static int arg_show_object_names = 1;

#define DEFAULT_OIDSET_SIZE (16*1024)

static char line_term = '\n';
static char info_term = ' ';

static int show_disk_usage;
static off_t total_disk_usage;
static int human_readable;
@ -264,7 +268,7 @@ static void show_commit(struct commit *commit, void *data)
if (revs->commit_format == CMIT_FMT_ONELINE)
putchar(' ');
else if (revs->include_header)
putchar('\n');
putchar(line_term);

if (revs->verbose_header) {
struct strbuf buf = STRBUF_INIT;
@ -361,12 +365,16 @@ static void show_object(struct object *obj, const char *name, void *cb_data)
printf("%s", oid_to_hex(&obj->oid));

if (arg_show_object_names) {
putchar(' ');
for (const char *p = name; *p && *p != '\n'; p++)
putchar(*p);
if (line_term) {
putchar(info_term);
for (const char *p = name; *p && *p != '\n'; p++)
putchar(*p);
} else if (*name) {
printf("%cpath=%s", info_term, name);
}
}

putchar('\n');
putchar(line_term);
}

static void show_edge(struct commit *commit)
@ -642,6 +650,9 @@ int cmd_rev_list(int argc,
revs.exclude_promisor_objects = 1;
} else if (skip_prefix(arg, "--missing=", &arg)) {
parse_missing_action_value(arg);
} else if (!strcmp(arg, "-z")) {
line_term = '\0';
info_term = '\0';
}
}

@ -757,6 +768,20 @@ int cmd_rev_list(int argc,
usage(rev_list_usage);

}

/*
* Reject options currently incompatible with -z. For some options, this
* is not an inherent limitation and support may be implemented in the
* future.
*/
if (!line_term) {
if (revs.graph || revs.verbose_header || show_disk_usage ||
info.show_timestamp || info.header_prefix || bisect_list ||
use_bitmap_index || revs.edge_hint || revs.left_right ||
revs.cherry_mark || arg_missing_action || revs.boundary)
die(_("-z option used with unsupported option"));
}

if (revs.commit_format != CMIT_FMT_USERFORMAT)
revs.include_header = 1;
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {

View File

@ -182,4 +182,39 @@ test_expect_success 'rev-list --unpacked' '
test_cmp expect actual
'

test_expect_success 'rev-list -z' '
test_when_finished rm -rf repo &&

git init repo &&
test_commit -C repo 1 &&
test_commit -C repo 2 &&

oid1=$(git -C repo rev-parse HEAD~) &&
oid2=$(git -C repo rev-parse HEAD) &&

printf "%s\0%s\0" "$oid2" "$oid1" >expect &&
git -C repo rev-list -z HEAD >actual &&

test_cmp expect actual
'

test_expect_success 'rev-list -z --objects' '
test_when_finished rm -rf repo &&

git init repo &&
test_commit -C repo 1 &&
test_commit -C repo 2 &&

oid1=$(git -C repo rev-parse HEAD:1.t) &&
oid2=$(git -C repo rev-parse HEAD:2.t) &&
path1=1.t &&
path2=2.t &&

printf "%s\0path=%s\0%s\0path=%s\0" "$oid1" "$path1" "$oid2" "$path2" \
>expect &&
git -C repo rev-list -z --objects HEAD:1.t HEAD:2.t >actual &&

test_cmp expect actual
'

test_done