From 03f2915541a4923c5733e505a42e77031eb9494c Mon Sep 17 00:00:00 2001 From: Niels Glodny Date: Tue, 29 Apr 2025 16:09:49 +0200 Subject: [PATCH] xdiff: disable cleanup_records heuristic with --minimal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cleanup_records function marks some lines as changed before running the actual diff algorithm. For most lines, this is a good performance optimization, but it also marks lines that are surrounded by many changed lines as changed as well. This can cause redundant changes and longer-than-necessary diffs. Whether this results in better-looking diffs is subjective. However, the --minimal flag explicitly requests the shortest possible diff. The change results in shorter diffs in about 1.3% of all diffs in Git's history. Performance wise, I have measured the impact on "git log -p -3000 --minimal > /dev/null". With this change, I get Time (mean ± σ): 2.363 s ± 0.023 s (25 runs) and without this patch I measured Time (mean ± σ): 2.362 s ± 0.035 s (25 runs). As the difference is well within the margin of error, this does not seem to have an impact on performance. Signed-off-by: Niels Glodny Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t4071-diff-minimal.sh | 14 ++++++++++++++ xdiff/xprepare.c | 5 +++-- 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100755 t/t4071-diff-minimal.sh diff --git a/t/meson.build b/t/meson.build index bfb744e886..8f2e9d2c50 100644 --- a/t/meson.build +++ b/t/meson.build @@ -501,6 +501,7 @@ integration_tests = [ 't4068-diff-symmetric-merge-base.sh', 't4069-remerge-diff.sh', 't4070-diff-pairs.sh', + 't4071-diff-minimal.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4071-diff-minimal.sh b/t/t4071-diff-minimal.sh new file mode 100755 index 0000000000..4c484dadfb --- /dev/null +++ b/t/t4071-diff-minimal.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +test_description='minimal diff algorithm' + +. ./test-lib.sh + +test_expect_success 'minimal diff should not mark changes between changed lines' ' + test_write_lines x x x x >pre && + test_write_lines x x x A B C D x E F G >post && + test_expect_code 1 git diff --no-index --minimal pre post >diff && + test_grep ! ^[+-]x diff +' + +test_done diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index c84549f6c5..e1d4017b2d 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -368,6 +368,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd xrecord_t **recs; xdlclass_t *rcrec; char *dis, *dis1, *dis2; + int need_min = !!(cf->flags & XDF_NEED_MINIMAL); if (!XDL_CALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2)) return -1; @@ -379,7 +380,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len2 : 0; - dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; } if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) @@ -387,7 +388,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len1 : 0; - dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; } for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];