From a81411253323208e1e8d3591247c27fefa8a2045 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 4 May 2026 15:06:18 +0100 Subject: [PATCH 1/4] xdiff: reduce size of action arrays When the myers algorithm is selected the input files are pre-processed to remove any common prefix and suffix. Then any lines that appear only in one side of the diff are marked as changed and frequently occurring lines are marked as changed if they are adjacent to a changed line. This step requires a couple of temporary arrays. As as the common prefix and suffix have already been removed, the arrays only need to be big enough to hold the lines between them, not the whole file. Reduce the size of the arrays and adjust the loops that use them accordingly while taking care to keep indexing the arrays in xdfile_t with absolute line numbers. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index beef711067..3b6bae0d15 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -273,16 +273,19 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); int ret = 0; + ptrdiff_t off = xdf1->dstart; + ptrdiff_t len1 = xdf1->dend - off + 1; + ptrdiff_t len2 = xdf2->dend - off + 1; /* * Create temporary arrays that will help us decide if * changed[i] should remain false, or become true. */ - if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) { + if (!XDL_CALLOC_ARRAY(action1, len1)) { ret = -1; goto cleanup; } - if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) { + if (!XDL_CALLOC_ARRAY(action2, len2)) { ret = -1; goto cleanup; } @@ -298,8 +301,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd if (mlim1 > XDL_MAX_EQLIMIT) mlim1 = XDL_MAX_EQLIMIT; } - for (i = xdf1->dstart; i <= xdf1->dend; i++) { - size_t mph1 = xdf1->recs[i].minimal_perfect_hash; + for (i = 0; i < len1; i++) { + size_t mph1 = xdf1->recs[i + off].minimal_perfect_hash; rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; if (nm == 0) @@ -318,8 +321,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd if (mlim2 > XDL_MAX_EQLIMIT) mlim2 = XDL_MAX_EQLIMIT; } - for (i = xdf2->dstart; i <= xdf2->dend; i++) { - size_t mph2 = xdf2->recs[i].minimal_perfect_hash; + for (i = 0; i < len2; i++) { + size_t mph2 = xdf2->recs[i + off].minimal_perfect_hash; rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; if (nm == 0) @@ -335,42 +338,42 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * false, or become true. */ xdf1->nreff = 0; - for (i = xdf1->dstart; i <= xdf1->dend; i++) { + for (i = 0; i < len1; i++) { uint8_t action = action1[i]; if (action == INVESTIGATE) { - if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend)) + if (!xdl_clean_mmatch(action1, i, 0, len1 - 1)) action = KEEP; else action = DISCARD; } if (action == KEEP) { - xdf1->reference_index[xdf1->nreff++] = i; + xdf1->reference_index[xdf1->nreff++] = i + off; /* changed[i] remains false */ } else if (action == DISCARD) { - xdf1->changed[i] = true; + xdf1->changed[i + off] = true; } else { BUG("Illegal state for action"); } } xdf2->nreff = 0; - for (i = xdf2->dstart; i <= xdf2->dend; i++) { + for (i = 0; i < len2; i++) { uint8_t action = action2[i]; if (action == INVESTIGATE) { - if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend)) + if (!xdl_clean_mmatch(action2, i, 0, len2 - 1)) action = KEEP; else action = DISCARD; } if (action == KEEP) { - xdf2->reference_index[xdf2->nreff++] = i; + xdf2->reference_index[xdf2->nreff++] = i + off; /* changed[i] remains false */ } else if (action == DISCARD) { - xdf2->changed[i] = true; + xdf2->changed[i + off] = true; } else { BUG("Illegal state for action"); } From 53d13887b8581d46dffc1f4ee2622c977b65ecb5 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 4 May 2026 15:06:19 +0100 Subject: [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch() Remove the "s" parameter as, since the last commit, this function is always called with s == 0. Also change parameter "e" to expect a length, rather than the index of the last line to simplify the caller. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 3b6bae0d15..81de412875 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -197,8 +197,9 @@ void xdl_free_env(xdfenv_t *xe) { } -static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) { +static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t len) { ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1; + ptrdiff_t s = 0, e = len - 1; /* * Limits the window that is examined during the similar-lines @@ -342,7 +343,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd uint8_t action = action1[i]; if (action == INVESTIGATE) { - if (!xdl_clean_mmatch(action1, i, 0, len1 - 1)) + if (!xdl_clean_mmatch(action1, i, len1)) action = KEEP; else action = DISCARD; @@ -363,7 +364,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd uint8_t action = action2[i]; if (action == INVESTIGATE) { - if (!xdl_clean_mmatch(action2, i, 0, len2 - 1)) + if (!xdl_clean_mmatch(action2, i, len2)) action = KEEP; else action = DISCARD; From c8eb18f58607057a812654bdfca3e6b47bd0ffe4 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 4 May 2026 15:06:20 +0100 Subject: [PATCH 3/4] xprepare: simplify error handling If either of the two allocations fail we want to take the same action so use a single if statement. This saves a few lines and makes it easier for the next commit to add a couple more allocations. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 81de412875..7a29e5fc47 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -282,11 +282,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * Create temporary arrays that will help us decide if * changed[i] should remain false, or become true. */ - if (!XDL_CALLOC_ARRAY(action1, len1)) { - ret = -1; - goto cleanup; - } - if (!XDL_CALLOC_ARRAY(action2, len2)) { + if (!XDL_CALLOC_ARRAY(action1, len1) || + !XDL_CALLOC_ARRAY(action2, len2)) { ret = -1; goto cleanup; } From dca97e79bbf75f27602fe277344bfebebed82bb9 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 4 May 2026 15:06:21 +0100 Subject: [PATCH 4/4] xdiff: reduce the size of array When the myers algorithm is selected the input files are pre-processed to remove any common prefix and suffix and any lines that appear in only one file. This requires a map to be created between the lines that are processed by the myers algorithm and the lines in the original file. That map does not include the common lines at the beginning and end of the files but the array is allocated to be the size of the whole file. Move the allocation into xdl_cleanup_records() where the map is populated and we know how big it needs to be. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 7a29e5fc47..11bada2608 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -171,12 +171,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2)) goto abort; - if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && - (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) { - if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1)) - goto abort; - } - xdf->changed += 1; xdf->nreff = 0; xdf->dstart = 0; @@ -283,7 +277,10 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * changed[i] should remain false, or become true. */ if (!XDL_CALLOC_ARRAY(action1, len1) || - !XDL_CALLOC_ARRAY(action2, len2)) { + !XDL_CALLOC_ARRAY(action2, len2) || + !XDL_ALLOC_ARRAY(xdf1->reference_index, len1) || + !XDL_ALLOC_ARRAY(xdf2->reference_index, len2)) + { ret = -1; goto cleanup; }