Merge branch 'jt/diff-pairs'

A post-processing filter for "diff --raw" output has been
introduced.

* jt/diff-pairs:
  builtin/diff-pairs: allow explicit diff queue flush
  builtin: introduce diff-pairs command
  diff: add option to skip resolving diff statuses
  diff: return diff_filepair from diff queue helpers
maint
Junio C Hamano 2025-03-26 16:26:09 +09:00
commit f50df872a4
13 changed files with 449 additions and 21 deletions

1
.gitignore vendored
View File

@ -55,6 +55,7 @@
/git-diff
/git-diff-files
/git-diff-index
/git-diff-pairs
/git-diff-tree
/git-difftool
/git-difftool--helper

View File

@ -0,0 +1,60 @@
git-diff-pairs(1)
=================

NAME
----
git-diff-pairs - Compare the content and mode of provided blob pairs

SYNOPSIS
--------
[synopsis]
git diff-pairs -z [<diff-options>]

DESCRIPTION
-----------
Show changes for file pairs provided on stdin. Input for this command must be
in the NUL-terminated raw output format as generated by commands such as `git
diff-tree -z -r --raw`. By default, the outputted diffs are computed and shown
in the patch format when stdin closes.

A single NUL byte may be written to stdin between raw input lines to compute
file pair diffs up to that point instead of waiting for stdin to close. A NUL
byte is also written to the output to delimit between these batches of diffs.

Usage of this command enables the traditional diff pipeline to be broken up
into separate stages where `diff-pairs` acts as the output phase. Other
commands, such as `diff-tree`, may serve as a frontend to compute the raw
diff format used as input.

Instead of computing diffs via `git diff-tree -p -M` in one step, `diff-tree`
can compute the file pairs and rename information without the blob diffs. This
output can be fed to `diff-pairs` to generate the underlying blob diffs as done
in the following example:

-----------------------------
git diff-tree -z -r -M $a $b |
git diff-pairs -z
-----------------------------

Computing the tree diff upfront with rename information allows patch output
from `diff-pairs` to be progressively computed over the course of potentially
multiple invocations.

Pathspecs are not currently supported by `diff-pairs`. Pathspec limiting should
be performed by the upstream command generating the raw diffs used as input.

Tree objects are not currently supported as input and are rejected.

Abbreviated object IDs in the `diff-pairs` input are not supported. Outputted
object IDs can be abbreviated using the `--abbrev` option.

OPTIONS
-------

include::diff-options.adoc[]

include::diff-generate-patch.adoc[]

GIT
---
Part of the linkgit:git[1] suite

View File

@ -42,6 +42,7 @@ manpages = {
'git-diagnose.adoc' : 1,
'git-diff-files.adoc' : 1,
'git-diff-index.adoc' : 1,
'git-diff-pairs.adoc' : 1,
'git-difftool.adoc' : 1,
'git-diff-tree.adoc' : 1,
'git-diff.adoc' : 1,

View File

@ -1242,6 +1242,7 @@ BUILTIN_OBJS += builtin/describe.o
BUILTIN_OBJS += builtin/diagnose.o
BUILTIN_OBJS += builtin/diff-files.o
BUILTIN_OBJS += builtin/diff-index.o
BUILTIN_OBJS += builtin/diff-pairs.o
BUILTIN_OBJS += builtin/diff-tree.o
BUILTIN_OBJS += builtin/diff.o
BUILTIN_OBJS += builtin/difftool.o

View File

@ -153,6 +153,7 @@ int cmd_diagnose(int argc, const char **argv, const char *prefix, struct reposit
int cmd_diff_files(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff_index(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff_pairs(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_diff_tree(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_difftool(int argc, const char **argv, const char *prefix, struct repository *repo);
int cmd_env__helper(int argc, const char **argv, const char *prefix, struct repository *repo);

207
builtin/diff-pairs.c Normal file
View File

@ -0,0 +1,207 @@
#include "builtin.h"
#include "config.h"
#include "diff.h"
#include "diffcore.h"
#include "gettext.h"
#include "hash.h"
#include "hex.h"
#include "object.h"
#include "parse-options.h"
#include "revision.h"
#include "strbuf.h"

static unsigned parse_mode_or_die(const char *mode, const char **end)
{
uint16_t ret;

*end = parse_mode(mode, &ret);
if (!*end)
die(_("unable to parse mode: %s"), mode);
return ret;
}

static void parse_oid_or_die(const char *hex, struct object_id *oid,
const char **end, const struct git_hash_algo *algop)
{
if (parse_oid_hex_algop(hex, oid, end, algop) || *(*end)++ != ' ')
die(_("unable to parse object id: %s"), hex);
}

int cmd_diff_pairs(int argc, const char **argv, const char *prefix,
struct repository *repo)
{
struct strbuf path_dst = STRBUF_INIT;
struct strbuf path = STRBUF_INIT;
struct strbuf meta = STRBUF_INIT;
struct option *parseopts;
struct rev_info revs;
int line_term = '\0';
int ret;

const char * const builtin_diff_pairs_usage[] = {
N_("git diff-pairs -z [<diff-options>]"),
NULL
};
struct option builtin_diff_pairs_options[] = {
OPT_END()
};

repo_init_revisions(repo, &revs, prefix);

/*
* Diff options are usually parsed implicitly as part of
* setup_revisions(). Explicitly handle parsing to ensure options are
* printed in the usage message.
*/
parseopts = add_diff_options(builtin_diff_pairs_options, &revs.diffopt);
show_usage_with_options_if_asked(argc, argv, builtin_diff_pairs_usage, parseopts);

repo_config(repo, git_diff_basic_config, NULL);
revs.diffopt.no_free = 1;
revs.disable_stdin = 1;
revs.abbrev = 0;
revs.diff = 1;

argc = parse_options(argc, argv, prefix, parseopts, builtin_diff_pairs_usage,
PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_DASHDASH);

if (setup_revisions(argc, argv, &revs, NULL) > 1)
usagef(_("unrecognized argument: %s"), argv[0]);

/*
* With the -z option, both command input and raw output are
* NUL-delimited (this mode does not affect patch output). At present
* only NUL-delimited raw diff formatted input is supported.
*/
if (revs.diffopt.line_termination)
usage(_("working without -z is not supported"));

if (revs.prune_data.nr)
usage(_("pathspec arguments not supported"));

if (revs.pending.nr || revs.max_count != -1 ||
revs.min_age != (timestamp_t)-1 ||
revs.max_age != (timestamp_t)-1)
usage(_("revision arguments not allowed"));

if (!revs.diffopt.output_format)
revs.diffopt.output_format = DIFF_FORMAT_PATCH;

/*
* If rename detection is not requested, use rename information from the
* raw diff formatted input. Setting skip_resolving_statuses ensures
* diffcore_std() does not mess with rename information already present
* in queued filepairs.
*/
if (!revs.diffopt.detect_rename)
revs.diffopt.skip_resolving_statuses = 1;

while (1) {
struct object_id oid_a, oid_b;
struct diff_filepair *pair;
unsigned mode_a, mode_b;
const char *p;
char status;

if (strbuf_getwholeline(&meta, stdin, line_term) == EOF)
break;

p = meta.buf;
if (!*p) {
diffcore_std(&revs.diffopt);
diff_flush(&revs.diffopt);
/*
* When the diff queue is explicitly flushed, append a
* NUL byte to separate batches of diffs.
*/
fputc('\0', revs.diffopt.file);
fflush(revs.diffopt.file);
continue;
}

if (*p != ':')
die(_("invalid raw diff input"));
p++;

mode_a = parse_mode_or_die(p, &p);
mode_b = parse_mode_or_die(p, &p);

if (S_ISDIR(mode_a) || S_ISDIR(mode_b))
die(_("tree objects not supported"));

parse_oid_or_die(p, &oid_a, &p, repo->hash_algo);
parse_oid_or_die(p, &oid_b, &p, repo->hash_algo);

status = *p++;

if (strbuf_getwholeline(&path, stdin, line_term) == EOF)
die(_("got EOF while reading path"));

switch (status) {
case DIFF_STATUS_ADDED:
pair = diff_queue_addremove(&diff_queued_diff,
&revs.diffopt, '+', mode_b,
&oid_b, 1, path.buf, 0);
if (pair)
pair->status = status;
break;

case DIFF_STATUS_DELETED:
pair = diff_queue_addremove(&diff_queued_diff,
&revs.diffopt, '-', mode_a,
&oid_a, 1, path.buf, 0);
if (pair)
pair->status = status;
break;

case DIFF_STATUS_TYPE_CHANGED:
case DIFF_STATUS_MODIFIED:
pair = diff_queue_change(&diff_queued_diff, &revs.diffopt,
mode_a, mode_b, &oid_a, &oid_b,
1, 1, path.buf, 0, 0);
if (pair)
pair->status = status;
break;

case DIFF_STATUS_RENAMED:
case DIFF_STATUS_COPIED: {
struct diff_filespec *a, *b;
unsigned int score;

if (strbuf_getwholeline(&path_dst, stdin, line_term) == EOF)
die(_("got EOF while reading destination path"));

a = alloc_filespec(path.buf);
b = alloc_filespec(path_dst.buf);
fill_filespec(a, &oid_a, 1, mode_a);
fill_filespec(b, &oid_b, 1, mode_b);

pair = diff_queue(&diff_queued_diff, a, b);

if (strtoul_ui(p, 10, &score))
die(_("unable to parse rename/copy score: %s"), p);

pair->score = score * MAX_SCORE / 100;
pair->status = status;
pair->renamed_pair = 1;
}
break;

default:
die(_("unknown diff status: %c"), status);
}
}

revs.diffopt.no_free = 0;
diffcore_std(&revs.diffopt);
diff_flush(&revs.diffopt);
ret = diff_result_code(&revs);

strbuf_release(&path_dst);
strbuf_release(&path);
strbuf_release(&meta);
release_revisions(&revs);
FREE_AND_NULL(parseopts);

return ret;
}

View File

@ -96,6 +96,7 @@ git-diagnose ancillaryinterrogators
git-diff mainporcelain info
git-diff-files plumbinginterrogators
git-diff-index plumbinginterrogators
git-diff-pairs plumbinginterrogators
git-diff-tree plumbinginterrogators
git-difftool ancillaryinterrogators complete
git-fast-export ancillarymanipulators

56
diff.c
View File

@ -7085,7 +7085,7 @@ void diffcore_std(struct diff_options *options)
diffcore_order(options->orderfile);
if (options->rotate_to)
diffcore_rotate(options);
if (!options->found_follow)
if (!options->found_follow && !options->skip_resolving_statuses)
/* See try_to_follow_renames() in tree-diff.c */
diff_resolve_rename_copy();
diffcore_apply_filter(options);
@ -7161,16 +7161,19 @@ void compute_diffstat(struct diff_options *options,
options->found_changes = !!diffstat->nr;
}

void diff_addremove(struct diff_options *options,
struct diff_filepair *diff_queue_addremove(struct diff_queue_struct *queue,
struct diff_options *options,
int addremove, unsigned mode,
const struct object_id *oid,
int oid_valid,
const char *concatpath, unsigned dirty_submodule)
const char *concatpath,
unsigned dirty_submodule)
{
struct diff_filespec *one, *two;
struct diff_filepair *pair;

if (S_ISGITLINK(mode) && is_submodule_ignored(concatpath, options))
return;
return NULL;

/* This may look odd, but it is a preparation for
* feeding "there are unchanged files which should
@ -7190,7 +7193,7 @@ void diff_addremove(struct diff_options *options,

if (options->prefix &&
strncmp(concatpath, options->prefix, options->prefix_length))
return;
return NULL;

one = alloc_filespec(concatpath);
two = alloc_filespec(concatpath);
@ -7202,25 +7205,29 @@ void diff_addremove(struct diff_options *options,
two->dirty_submodule = dirty_submodule;
}

diff_queue(&diff_queued_diff, one, two);
pair = diff_queue(queue, one, two);
if (!options->flags.diff_from_contents)
options->flags.has_changes = 1;

return pair;
}

void diff_change(struct diff_options *options,
struct diff_filepair *diff_queue_change(struct diff_queue_struct *queue,
struct diff_options *options,
unsigned old_mode, unsigned new_mode,
const struct object_id *old_oid,
const struct object_id *new_oid,
int old_oid_valid, int new_oid_valid,
const char *concatpath,
unsigned old_dirty_submodule, unsigned new_dirty_submodule)
unsigned old_dirty_submodule,
unsigned new_dirty_submodule)
{
struct diff_filespec *one, *two;
struct diff_filepair *p;

if (S_ISGITLINK(old_mode) && S_ISGITLINK(new_mode) &&
is_submodule_ignored(concatpath, options))
return;
return NULL;

if (options->flags.reverse_diff) {
SWAP(old_mode, new_mode);
@ -7231,7 +7238,7 @@ void diff_change(struct diff_options *options,

if (options->prefix &&
strncmp(concatpath, options->prefix, options->prefix_length))
return;
return NULL;

one = alloc_filespec(concatpath);
two = alloc_filespec(concatpath);
@ -7239,19 +7246,42 @@ void diff_change(struct diff_options *options,
fill_filespec(two, new_oid, new_oid_valid, new_mode);
one->dirty_submodule = old_dirty_submodule;
two->dirty_submodule = new_dirty_submodule;
p = diff_queue(&diff_queued_diff, one, two);
p = diff_queue(queue, one, two);

if (options->flags.diff_from_contents)
return;
return p;

if (options->flags.quick && options->skip_stat_unmatch &&
!diff_filespec_check_stat_unmatch(options->repo, p)) {
diff_free_filespec_data(p->one);
diff_free_filespec_data(p->two);
return;
return p;
}

options->flags.has_changes = 1;

return p;
}

void diff_addremove(struct diff_options *options, int addremove, unsigned mode,
const struct object_id *oid, int oid_valid,
const char *concatpath, unsigned dirty_submodule)
{
diff_queue_addremove(&diff_queued_diff, options, addremove, mode, oid,
oid_valid, concatpath, dirty_submodule);
}

void diff_change(struct diff_options *options,
unsigned old_mode, unsigned new_mode,
const struct object_id *old_oid,
const struct object_id *new_oid,
int old_oid_valid, int new_oid_valid,
const char *concatpath,
unsigned old_dirty_submodule, unsigned new_dirty_submodule)
{
diff_queue_change(&diff_queued_diff, options, old_mode, new_mode,
old_oid, new_oid, old_oid_valid, new_oid_valid,
concatpath, old_dirty_submodule, new_dirty_submodule);
}

struct diff_filepair *diff_unmerge(struct diff_options *options, const char *path)

33
diff.h
View File

@ -353,6 +353,14 @@ struct diff_options {
/* to support internal diff recursion by --follow hack*/
int found_follow;

/*
* By default, diffcore_std() resolves the statuses for queued diff file
* pairs by calling diff_resolve_rename_copy(). If status information
* has already been manually set, this option prevents diffcore_std()
* from resetting statuses.
*/
int skip_resolving_statuses;

/* Callback which allows tweaking the options in diff_setup_done(). */
void (*set_default)(struct diff_options *);

@ -508,6 +516,31 @@ void diff_set_default_prefix(struct diff_options *options);

int diff_can_quit_early(struct diff_options *);

/*
* Stages changes in the provided diff queue for file additions and deletions.
* If a file pair gets queued, it is returned.
*/
struct diff_filepair *diff_queue_addremove(struct diff_queue_struct *queue,
struct diff_options *,
int addremove, unsigned mode,
const struct object_id *oid,
int oid_valid, const char *fullpath,
unsigned dirty_submodule);

/*
* Stages changes in the provided diff queue for file modifications.
* If a file pair gets queued, it is returned.
*/
struct diff_filepair *diff_queue_change(struct diff_queue_struct *queue,
struct diff_options *,
unsigned mode1, unsigned mode2,
const struct object_id *old_oid,
const struct object_id *new_oid,
int old_oid_valid, int new_oid_valid,
const char *fullpath,
unsigned dirty_submodule1,
unsigned dirty_submodule2);

void diff_addremove(struct diff_options *,
int addremove,
unsigned mode,

1
git.c
View File

@ -541,6 +541,7 @@ static struct cmd_struct commands[] = {
{ "diff", cmd_diff, NO_PARSEOPT },
{ "diff-files", cmd_diff_files, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT },
{ "diff-index", cmd_diff_index, RUN_SETUP | NO_PARSEOPT },
{ "diff-pairs", cmd_diff_pairs, RUN_SETUP | NO_PARSEOPT },
{ "diff-tree", cmd_diff_tree, RUN_SETUP | NO_PARSEOPT },
{ "difftool", cmd_difftool, RUN_SETUP_GENTLY },
{ "fast-export", cmd_fast_export, RUN_SETUP },

View File

@ -540,6 +540,7 @@ builtin_sources = [
'builtin/diagnose.c',
'builtin/diff-files.c',
'builtin/diff-index.c',
'builtin/diff-pairs.c',
'builtin/diff-tree.c',
'builtin/diff.c',
'builtin/difftool.c',

View File

@ -500,6 +500,7 @@ integration_tests = [
't4067-diff-partial-clone.sh',
't4068-diff-symmetric-merge-base.sh',
't4069-remerge-diff.sh',
't4070-diff-pairs.sh',
't4100-apply-stat.sh',
't4101-apply-nonl.sh',
't4102-apply-rename.sh',

90
t/t4070-diff-pairs.sh Executable file
View File

@ -0,0 +1,90 @@
#!/bin/sh

test_description='basic diff-pairs tests'
. ./test-lib.sh

# This creates a diff with added, modified, deleted, renamed, copied, and
# typechange entries. This includes a submodule to test submodule diff support.
test_expect_success 'setup' '
test_config_global protocol.file.allow always &&
git init sub &&
test_commit -C sub initial &&

git init main &&
cd main &&
echo to-be-gone >deleted &&
echo original >modified &&
echo now-a-file >symlink &&
test_seq 200 >two-hundred &&
test_seq 201 500 >five-hundred &&
git add . &&
test_tick &&
git commit -m base &&
git tag base &&

git submodule add ../sub &&
echo now-here >added &&
echo new >modified &&
rm deleted &&
mkdir subdir &&
echo content >subdir/file &&
mv two-hundred renamed &&
test_seq 201 500 | sed s/300/modified/ >copied &&
rm symlink &&
git add -A . &&
test_ln_s_add dest symlink &&
test_tick &&
git commit -m new &&
git tag new
'

test_expect_success 'diff-pairs recreates --raw' '
git diff-tree -r -M -C -C -z base new >expect &&
git diff-pairs --raw -z >actual <expect &&
test_cmp expect actual
'

test_expect_success 'diff-pairs can create -p output' '
git diff-tree -p -M -C -C base new >expect &&
git diff-tree -r -M -C -C -z base new |
git diff-pairs -p -z >actual &&
test_cmp expect actual
'

test_expect_success 'diff-pairs does not support normal raw diff input' '
git diff-tree -r base new |
test_must_fail git diff-pairs >out 2>err &&

echo "usage: working without -z is not supported" >expect &&
test_must_be_empty out &&
test_cmp expect err
'

test_expect_success 'diff-pairs does not support tree objects as input' '
git diff-tree -z base new |
test_must_fail git diff-pairs -z >out 2>err &&

echo "fatal: tree objects not supported" >expect &&
test_must_be_empty out &&
test_cmp expect err
'

test_expect_success 'diff-pairs does not support pathspec arguments' '
git diff-tree -r -z base new |
test_must_fail git diff-pairs -z -- new >out 2>err &&

echo "usage: pathspec arguments not supported" >expect &&
test_must_be_empty out &&
test_cmp expect err
'

test_expect_success 'diff-pairs explicit queue flush' '
git diff-tree -r -M -C -C -z base new >expect &&
printf "\0" >>expect &&
git diff-tree -r -M -C -C -z base new >>expect &&

git diff-pairs --raw -z <expect >actual &&
test_cmp expect actual
'

test_done