apply: refactor `struct image` to use a `struct strbuf`

The `struct image` uses a character array to track the pre- or postimage
of a patch operation. This has multiple downsides:

  - It is somewhat hard to track memory ownership. In fact, we have
    several memory leaks in git-apply(1) because we do not (and cannot
    easily) free the buffer in all situations.

  - We have to reinvent the wheel and manually implement a lot of
    functionality that would already be provided by `struct strbuf`.

  - We have to carefully track whether `update_pre_post_images()` can do
    an in-place update of the postimage or whether it has to allocate a
    new buffer for it.

This is all rather cumbersome, and especially `update_pre_post_images()`
is really hard to understand as a consequence even though what it is
doing is rather trivial.

Refactor the code to use a `struct strbuf` instead, addressing all of
the above. Like this we can easily perform in-place updates in all
situations, the logic to perform those updates becomes way simpler and
the lifetime of the buffer becomes a ton easier to track.

This refactoring also plugs some leaking buffers as a side effect.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Patrick Steinhardt 2024-09-17 12:08:08 +02:00 committed by Junio C Hamano
parent e73686f6e4
commit 3fc4eab466
6 changed files with 79 additions and 123 deletions

194
apply.c
View File

@ -277,12 +277,13 @@ struct line {
* This represents a "file", which is an array of "lines". * This represents a "file", which is an array of "lines".
*/ */
struct image { struct image {
char *buf; struct strbuf buf;
size_t len;
struct line *line; struct line *line;
size_t line_nr, line_alloc; size_t line_nr, line_alloc;
}; };
#define IMAGE_INIT { 0 } #define IMAGE_INIT { \
.buf = STRBUF_INIT, \
}


static void image_init(struct image *image) static void image_init(struct image *image)
{ {
@ -292,7 +293,7 @@ static void image_init(struct image *image)


static void image_clear(struct image *image) static void image_clear(struct image *image)
{ {
free(image->buf); strbuf_release(&image->buf);
free(image->line); free(image->line);
image_init(image); image_init(image);
} }
@ -329,14 +330,13 @@ static void image_prepare(struct image *image, char *buf, size_t len,
const char *cp, *ep; const char *cp, *ep;


image_clear(image); image_clear(image);
image->buf = buf; strbuf_attach(&image->buf, buf, len, len + 1);
image->len = len;


if (!prepare_linetable) if (!prepare_linetable)
return; return;


ep = image->buf + image->len; ep = image->buf.buf + image->buf.len;
cp = image->buf; cp = image->buf.buf;
while (cp < ep) { while (cp < ep) {
const char *next; const char *next;
for (next = cp; next < ep && *next != '\n'; next++) for (next = cp; next < ep && *next != '\n'; next++)
@ -350,8 +350,7 @@ static void image_prepare(struct image *image, char *buf, size_t len,


static void image_remove_first_line(struct image *img) static void image_remove_first_line(struct image *img)
{ {
img->buf += img->line[0].len; strbuf_remove(&img->buf, 0, img->line[0].len);
img->len -= img->line[0].len;
img->line_nr--; img->line_nr--;
if (img->line_nr) if (img->line_nr)
MOVE_ARRAY(img->line, img->line + 1, img->line_nr); MOVE_ARRAY(img->line, img->line + 1, img->line_nr);
@ -359,7 +358,9 @@ static void image_remove_first_line(struct image *img)


static void image_remove_last_line(struct image *img) static void image_remove_last_line(struct image *img)
{ {
img->len -= img->line[--img->line_nr].len; size_t last_line_len = img->line[img->line_nr - 1].len;
strbuf_setlen(&img->buf, img->buf.len - last_line_len);
img->line_nr--;
} }


/* fmt must contain _one_ %s and no other substitution */ /* fmt must contain _one_ %s and no other substitution */
@ -2308,19 +2309,16 @@ static int read_old_data(struct stat *st, struct patch *patch,


/* /*
* Update the preimage, and the common lines in postimage, * Update the preimage, and the common lines in postimage,
* from buffer buf of length len. If postlen is 0 the postimage * from buffer buf of length len.
* is updated in place, otherwise it's updated on a new buffer
* of length postlen
*/ */

static void update_pre_post_images(struct image *preimage, static void update_pre_post_images(struct image *preimage,
struct image *postimage, struct image *postimage,
char *buf, char *buf, size_t len)
size_t len, size_t postlen)
{ {
int i, ctx, reduced;
char *new_buf, *old_buf, *fixed;
struct image fixed_preimage = IMAGE_INIT; struct image fixed_preimage = IMAGE_INIT;
size_t insert_pos = 0;
int i, ctx, reduced;
const char *fixed;


/* /*
* Update the preimage with whitespace fixes. Note that we * Update the preimage with whitespace fixes. Note that we
@ -2328,43 +2326,24 @@ static void update_pre_post_images(struct image *preimage,
* free "oldlines". * free "oldlines".
*/ */
image_prepare(&fixed_preimage, buf, len, 1); image_prepare(&fixed_preimage, buf, len, 1);
assert(postlen
? fixed_preimage.line_nr == preimage->line_nr
: fixed_preimage.line_nr <= preimage->line_nr);
for (i = 0; i < fixed_preimage.line_nr; i++) for (i = 0; i < fixed_preimage.line_nr; i++)
fixed_preimage.line[i].flag = preimage->line[i].flag; fixed_preimage.line[i].flag = preimage->line[i].flag;
free(preimage->line); image_clear(preimage);
*preimage = fixed_preimage; *preimage = fixed_preimage;
fixed = preimage->buf.buf;


/* /*
* Adjust the common context lines in postimage. This can be * Adjust the common context lines in postimage.
* done in-place when we are shrinking it with whitespace
* fixing, but needs a new buffer when ignoring whitespace or
* expanding leading tabs to spaces.
*
* We trust the caller to tell us if the update can be done
* in place (postlen==0) or not.
*/ */
old_buf = postimage->buf;
if (postlen)
new_buf = postimage->buf = xmalloc(postlen);
else
new_buf = old_buf;
fixed = preimage->buf;

for (i = reduced = ctx = 0; i < postimage->line_nr; i++) { for (i = reduced = ctx = 0; i < postimage->line_nr; i++) {
size_t l_len = postimage->line[i].len; size_t l_len = postimage->line[i].len;

if (!(postimage->line[i].flag & LINE_COMMON)) { if (!(postimage->line[i].flag & LINE_COMMON)) {
/* an added line -- no counterparts in preimage */ /* an added line -- no counterparts in preimage */
memmove(new_buf, old_buf, l_len); insert_pos += l_len;
old_buf += l_len;
new_buf += l_len;
continue; continue;
} }


/* a common context -- skip it in the original postimage */
old_buf += l_len;

/* and find the corresponding one in the fixed preimage */ /* and find the corresponding one in the fixed preimage */
while (ctx < preimage->line_nr && while (ctx < preimage->line_nr &&
!(preimage->line[ctx].flag & LINE_COMMON)) { !(preimage->line[ctx].flag & LINE_COMMON)) {
@ -2383,21 +2362,15 @@ static void update_pre_post_images(struct image *preimage,


/* and copy it in, while fixing the line length */ /* and copy it in, while fixing the line length */
l_len = preimage->line[ctx].len; l_len = preimage->line[ctx].len;
memcpy(new_buf, fixed, l_len); strbuf_splice(&postimage->buf, insert_pos, postimage->line[i].len,
new_buf += l_len; fixed, l_len);
insert_pos += l_len;
fixed += l_len; fixed += l_len;
postimage->line[i].len = l_len; postimage->line[i].len = l_len;
ctx++; ctx++;
} }


if (postlen
? postlen < new_buf - postimage->buf
: postimage->len < new_buf - postimage->buf)
BUG("caller miscounted postlen: asked %d, orig = %d, used = %d",
(int)postlen, (int) postimage->len, (int)(new_buf - postimage->buf));

/* Fix the length of the whole thing */ /* Fix the length of the whole thing */
postimage->len = new_buf - postimage->buf;
postimage->line_nr -= reduced; postimage->line_nr -= reduced;
} }


@ -2447,7 +2420,6 @@ static int line_by_line_fuzzy_match(struct image *img,
int i; int i;
size_t imgoff = 0; size_t imgoff = 0;
size_t preoff = 0; size_t preoff = 0;
size_t postlen = postimage->len;
size_t extra_chars; size_t extra_chars;
char *buf; char *buf;
char *preimage_eof; char *preimage_eof;
@ -2460,11 +2432,9 @@ static int line_by_line_fuzzy_match(struct image *img,
size_t prelen = preimage->line[i].len; size_t prelen = preimage->line[i].len;
size_t imglen = img->line[current_lno+i].len; size_t imglen = img->line[current_lno+i].len;


if (!fuzzy_matchlines(img->buf + current + imgoff, imglen, if (!fuzzy_matchlines(img->buf.buf + current + imgoff, imglen,
preimage->buf + preoff, prelen)) preimage->buf.buf + preoff, prelen))
return 0; return 0;
if (preimage->line[i].flag & LINE_COMMON)
postlen += imglen - prelen;
imgoff += imglen; imgoff += imglen;
preoff += prelen; preoff += prelen;
} }
@ -2480,10 +2450,10 @@ static int line_by_line_fuzzy_match(struct image *img,
* are whitespace characters. (This can only happen if * are whitespace characters. (This can only happen if
* we are removing blank lines at the end of the file.) * we are removing blank lines at the end of the file.)
*/ */
buf = preimage_eof = preimage->buf + preoff; buf = preimage_eof = preimage->buf.buf + preoff;
for ( ; i < preimage->line_nr; i++) for ( ; i < preimage->line_nr; i++)
preoff += preimage->line[i].len; preoff += preimage->line[i].len;
preimage_end = preimage->buf + preoff; preimage_end = preimage->buf.buf + preoff;
for ( ; buf < preimage_end; buf++) for ( ; buf < preimage_end; buf++)
if (!isspace(*buf)) if (!isspace(*buf))
return 0; return 0;
@ -2497,11 +2467,11 @@ static int line_by_line_fuzzy_match(struct image *img,
*/ */
extra_chars = preimage_end - preimage_eof; extra_chars = preimage_end - preimage_eof;
strbuf_init(&fixed, imgoff + extra_chars); strbuf_init(&fixed, imgoff + extra_chars);
strbuf_add(&fixed, img->buf + current, imgoff); strbuf_add(&fixed, img->buf.buf + current, imgoff);
strbuf_add(&fixed, preimage_eof, extra_chars); strbuf_add(&fixed, preimage_eof, extra_chars);
fixed_buf = strbuf_detach(&fixed, &fixed_len); fixed_buf = strbuf_detach(&fixed, &fixed_len);
update_pre_post_images(preimage, postimage, update_pre_post_images(preimage, postimage,
fixed_buf, fixed_len, postlen); fixed_buf, fixed_len);
return 1; return 1;
} }


@ -2517,7 +2487,8 @@ static int match_fragment(struct apply_state *state,
int i; int i;
const char *orig, *target; const char *orig, *target;
struct strbuf fixed = STRBUF_INIT; struct strbuf fixed = STRBUF_INIT;
size_t postlen; char *fixed_buf;
size_t fixed_len;
int preimage_limit; int preimage_limit;
int ret; int ret;


@ -2573,9 +2544,9 @@ static int match_fragment(struct apply_state *state,
* exactly. * exactly.
*/ */
if ((match_end if ((match_end
? (current + preimage->len == img->len) ? (current + preimage->buf.len == img->buf.len)
: (current + preimage->len <= img->len)) && : (current + preimage->buf.len <= img->buf.len)) &&
!memcmp(img->buf + current, preimage->buf, preimage->len)) { !memcmp(img->buf.buf + current, preimage->buf.buf, preimage->buf.len)) {
ret = 1; ret = 1;
goto out; goto out;
} }
@ -2589,7 +2560,7 @@ static int match_fragment(struct apply_state *state,
*/ */
const char *buf, *buf_end; const char *buf, *buf_end;


buf = preimage->buf; buf = preimage->buf.buf;
buf_end = buf; buf_end = buf;
for (i = 0; i < preimage_limit; i++) for (i = 0; i < preimage_limit; i++)
buf_end += preimage->line[i].len; buf_end += preimage->line[i].len;
@ -2634,21 +2605,14 @@ static int match_fragment(struct apply_state *state,
* fixed. * fixed.
*/ */


/* First count added lines in postimage */
postlen = 0;
for (i = 0; i < postimage->line_nr; i++) {
if (!(postimage->line[i].flag & LINE_COMMON))
postlen += postimage->line[i].len;
}

/* /*
* The preimage may extend beyond the end of the file, * The preimage may extend beyond the end of the file,
* but in this loop we will only handle the part of the * but in this loop we will only handle the part of the
* preimage that falls within the file. * preimage that falls within the file.
*/ */
strbuf_grow(&fixed, preimage->len + 1); strbuf_grow(&fixed, preimage->buf.len + 1);
orig = preimage->buf; orig = preimage->buf.buf;
target = img->buf + current; target = img->buf.buf + current;
for (i = 0; i < preimage_limit; i++) { for (i = 0; i < preimage_limit; i++) {
size_t oldlen = preimage->line[i].len; size_t oldlen = preimage->line[i].len;
size_t tgtlen = img->line[current_lno + i].len; size_t tgtlen = img->line[current_lno + i].len;
@ -2677,10 +2641,6 @@ static int match_fragment(struct apply_state *state,
!memcmp(tgtfix.buf, fixed.buf + fixstart, !memcmp(tgtfix.buf, fixed.buf + fixstart,
fixed.len - fixstart)); fixed.len - fixstart));


/* Add the length if this is common with the postimage */
if (preimage->line[i].flag & LINE_COMMON)
postlen += tgtfix.len;

strbuf_release(&tgtfix); strbuf_release(&tgtfix);
if (!match) { if (!match) {
ret = 0; ret = 0;
@ -2722,10 +2682,9 @@ static int match_fragment(struct apply_state *state,
* has whitespace breakages unfixed, and fixing them makes the * has whitespace breakages unfixed, and fixing them makes the
* hunk match. Update the context lines in the postimage. * hunk match. Update the context lines in the postimage.
*/ */
if (postlen < postimage->len) fixed_buf = strbuf_detach(&fixed, &fixed_len);
postlen = 0;
update_pre_post_images(preimage, postimage, update_pre_post_images(preimage, postimage,
fixed.buf, fixed.len, postlen); fixed_buf, fixed_len);


ret = 1; ret = 1;


@ -2839,6 +2798,7 @@ static void update_image(struct apply_state *state,
*/ */
int i, nr; int i, nr;
size_t remove_count, insert_count, applied_at = 0; size_t remove_count, insert_count, applied_at = 0;
size_t result_alloc;
char *result; char *result;
int preimage_limit; int preimage_limit;


@ -2861,19 +2821,18 @@ static void update_image(struct apply_state *state,
remove_count = 0; remove_count = 0;
for (i = 0; i < preimage_limit; i++) for (i = 0; i < preimage_limit; i++)
remove_count += img->line[applied_pos + i].len; remove_count += img->line[applied_pos + i].len;
insert_count = postimage->len; insert_count = postimage->buf.len;


/* Adjust the contents */ /* Adjust the contents */
result = xmalloc(st_add3(st_sub(img->len, remove_count), insert_count, 1)); result_alloc = st_add3(st_sub(img->buf.len, remove_count), insert_count, 1);
memcpy(result, img->buf, applied_at); result = xmalloc(result_alloc);
memcpy(result + applied_at, postimage->buf, postimage->len); memcpy(result, img->buf.buf, applied_at);
memcpy(result + applied_at + postimage->len, memcpy(result + applied_at, postimage->buf.buf, postimage->buf.len);
img->buf + (applied_at + remove_count), memcpy(result + applied_at + postimage->buf.len,
img->len - (applied_at + remove_count)); img->buf.buf + (applied_at + remove_count),
free(img->buf); img->buf.len - (applied_at + remove_count));
img->buf = result; strbuf_attach(&img->buf, result, postimage->buf.len + img->buf.len - remove_count,
img->len += insert_count - remove_count; result_alloc);
result[img->len] = '\0';


/* Adjust the line table */ /* Adjust the line table */
nr = img->line_nr + postimage->line_nr - preimage_limit; nr = img->line_nr + postimage->line_nr - preimage_limit;
@ -3054,10 +3013,8 @@ static int apply_one_fragment(struct apply_state *state,
match_end = !state->unidiff_zero && !trailing; match_end = !state->unidiff_zero && !trailing;


pos = frag->newpos ? (frag->newpos - 1) : 0; pos = frag->newpos ? (frag->newpos - 1) : 0;
preimage.buf = oldlines; strbuf_add(&preimage.buf, oldlines, old - oldlines);
preimage.len = old - oldlines; strbuf_swap(&postimage.buf, &newlines);
postimage.buf = newlines.buf;
postimage.len = newlines.len;


for (;;) { for (;;) {


@ -3145,8 +3102,8 @@ static int apply_one_fragment(struct apply_state *state,
out: out:
free(oldlines); free(oldlines);
strbuf_release(&newlines); strbuf_release(&newlines);
free(preimage.line); image_clear(&preimage);
free(postimage.line); image_clear(&postimage);


return (applied_pos < 0); return (applied_pos < 0);
} }
@ -3176,18 +3133,16 @@ static int apply_binary_fragment(struct apply_state *state,
} }
switch (fragment->binary_patch_method) { switch (fragment->binary_patch_method) {
case BINARY_DELTA_DEFLATED: case BINARY_DELTA_DEFLATED:
dst = patch_delta(img->buf, img->len, fragment->patch, dst = patch_delta(img->buf.buf, img->buf.len, fragment->patch,
fragment->size, &len); fragment->size, &len);
if (!dst) if (!dst)
return -1; return -1;
image_clear(img); image_clear(img);
img->buf = dst; strbuf_attach(&img->buf, dst, len, len + 1);
img->len = len;
return 0; return 0;
case BINARY_LITERAL_DEFLATED: case BINARY_LITERAL_DEFLATED:
image_clear(img); image_clear(img);
img->len = fragment->size; strbuf_add(&img->buf, fragment->patch, fragment->size);
img->buf = xmemdupz(fragment->patch, img->len);
return 0; return 0;
} }
return -1; return -1;
@ -3223,8 +3178,8 @@ static int apply_binary(struct apply_state *state,
* See if the old one matches what the patch * See if the old one matches what the patch
* applies to. * applies to.
*/ */
hash_object_file(the_hash_algo, img->buf, img->len, OBJ_BLOB, hash_object_file(the_hash_algo, img->buf.buf, img->buf.len,
&oid); OBJ_BLOB, &oid);
if (strcmp(oid_to_hex(&oid), patch->old_oid_prefix)) if (strcmp(oid_to_hex(&oid), patch->old_oid_prefix))
return error(_("the patch applies to '%s' (%s), " return error(_("the patch applies to '%s' (%s), "
"which does not match the " "which does not match the "
@ -3233,7 +3188,7 @@ static int apply_binary(struct apply_state *state,
} }
else { else {
/* Otherwise, the old one must be empty. */ /* Otherwise, the old one must be empty. */
if (img->len) if (img->buf.len)
return error(_("the patch applies to an empty " return error(_("the patch applies to an empty "
"'%s' but it is not empty"), name); "'%s' but it is not empty"), name);
} }
@ -3257,8 +3212,7 @@ static int apply_binary(struct apply_state *state,
"'%s' cannot be read"), "'%s' cannot be read"),
patch->new_oid_prefix, name); patch->new_oid_prefix, name);
image_clear(img); image_clear(img);
img->buf = result; strbuf_attach(&img->buf, result, size, size + 1);
img->len = size;
} else { } else {
/* /*
* We have verified buf matches the preimage; * We have verified buf matches the preimage;
@ -3270,7 +3224,7 @@ static int apply_binary(struct apply_state *state,
name); name);


/* verify that the result matches */ /* verify that the result matches */
hash_object_file(the_hash_algo, img->buf, img->len, OBJ_BLOB, hash_object_file(the_hash_algo, img->buf.buf, img->buf.len, OBJ_BLOB,
&oid); &oid);
if (strcmp(oid_to_hex(&oid), patch->new_oid_prefix)) if (strcmp(oid_to_hex(&oid), patch->new_oid_prefix))
return error(_("binary patch to '%s' creates incorrect result (expecting %s, got %s)"), return error(_("binary patch to '%s' creates incorrect result (expecting %s, got %s)"),
@ -3540,14 +3494,14 @@ static int resolve_to(struct image *image, const struct object_id *result_id)
{ {
unsigned long size; unsigned long size;
enum object_type type; enum object_type type;
char *data;


image_clear(image); image_clear(image);


image->buf = repo_read_object_file(the_repository, result_id, &type, data = repo_read_object_file(the_repository, result_id, &type, &size);
&size); if (!data || type != OBJ_BLOB)
if (!image->buf || type != OBJ_BLOB)
die("unable to read blob object %s", oid_to_hex(result_id)); die("unable to read blob object %s", oid_to_hex(result_id));
image->len = size; strbuf_attach(&image->buf, data, size, size + 1);


return 0; return 0;
} }
@ -3589,8 +3543,7 @@ static int three_way_merge(struct apply_state *state,
return -1; return -1;
} }
image_clear(image); image_clear(image);
image->buf = result.ptr; strbuf_attach(&image->buf, result.ptr, result.size, result.size);
image->len = result.size;


return status; return status;
} }
@ -3677,7 +3630,7 @@ static int try_threeway(struct apply_state *state,
return -1; return -1;
} }
/* post_oid is theirs */ /* post_oid is theirs */
write_object_file(tmp_image.buf, tmp_image.len, OBJ_BLOB, &post_oid); write_object_file(tmp_image.buf.buf, tmp_image.buf.len, OBJ_BLOB, &post_oid);
image_clear(&tmp_image); image_clear(&tmp_image);


/* our_oid is ours */ /* our_oid is ours */
@ -3690,7 +3643,7 @@ static int try_threeway(struct apply_state *state,
return error(_("cannot read the current contents of '%s'"), return error(_("cannot read the current contents of '%s'"),
patch->old_name); patch->old_name);
} }
write_object_file(tmp_image.buf, tmp_image.len, OBJ_BLOB, &our_oid); write_object_file(tmp_image.buf.buf, tmp_image.buf.len, OBJ_BLOB, &our_oid);
image_clear(&tmp_image); image_clear(&tmp_image);


/* in-core three-way merge between post and our using pre as base */ /* in-core three-way merge between post and our using pre as base */
@ -3743,8 +3696,7 @@ static int apply_data(struct apply_state *state, struct patch *patch,
return -1; return -1;
} }
} }
patch->result = image.buf; patch->result = strbuf_detach(&image.buf, &patch->resultsize);
patch->resultsize = image.len;
add_to_fn_table(state, patch); add_to_fn_table(state, patch);
free(image.line); free(image.line);



View File

@ -5,6 +5,7 @@


test_description='tests to ensure compatibility between am and interactive backends' test_description='tests to ensure compatibility between am and interactive backends'


TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh . ./test-lib.sh


. "$TEST_DIRECTORY"/lib-rebase.sh . "$TEST_DIRECTORY"/lib-rebase.sh

View File

@ -3,9 +3,9 @@
# Copyright (c) 2009 Giuseppe Bilotta # Copyright (c) 2009 Giuseppe Bilotta
# #


test_description='git-apply --ignore-whitespace. test_description='git-apply --ignore-whitespace.'


' TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh . ./test-lib.sh


# This primes main.c file that indents without using HT at all. # This primes main.c file that indents without using HT at all.

View File

@ -2,6 +2,7 @@


test_description='core.whitespace rules and git apply' test_description='core.whitespace rules and git apply'


TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh . ./test-lib.sh


prepare_test_file () { prepare_test_file () {

View File

@ -2,6 +2,7 @@


test_description='applying patch that has broken whitespaces in context' test_description='applying patch that has broken whitespaces in context'


TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh . ./test-lib.sh


test_expect_success setup ' test_expect_success setup '

View File

@ -5,6 +5,7 @@


test_description='git apply test patches with whitespace expansion.' test_description='git apply test patches with whitespace expansion.'


TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh . ./test-lib.sh


test_expect_success setup ' test_expect_success setup '