Sync with Git 2.31.6

2 years ago · 8a755eddf5
12 changed files with 403 additions and 82 deletions
--- a/Documentation/RelNotes/2.30.7.txt
+++ b/Documentation/RelNotes/2.30.7.txt
@ -0,0 +1,86 @@
 Git v2.30.7 Release Notes
 =========================
 This release addresses the security issues CVE-2022-41903 and
 CVE-2022-23521.
 Fixes since v2.30.6
 -------------------
 * CVE-2022-41903:
   git log has the ability to display commits using an arbitrary
   format with its --format specifiers. This functionality is also
   exposed to git archive via the export-subst gitattribute.
   When processing the padding operators (e.g., %<(, %<|(, %>(,
   %>>(, or %><( ), an integer overflow can occur in
   pretty.c::format_and_pad_commit() where a size_t is improperly
   stored as an int, and then added as an offset to a subsequent
   memcpy() call.
   This overflow can be triggered directly by a user running a
   command which invokes the commit formatting machinery (e.g., git
   log --format=...). It may also be triggered indirectly through
   git archive via the export-subst mechanism, which expands format
   specifiers inside of files within the repository during a git
   archive.
   This integer overflow can result in arbitrary heap writes, which
   may result in remote code execution.
 * CVE-2022-23521:
    gitattributes are a mechanism to allow defining attributes for
    paths. These attributes can be defined by adding a `.gitattributes`
    file to the repository, which contains a set of file patterns and
    the attributes that should be set for paths matching this pattern.
    When parsing gitattributes, multiple integer overflows can occur
    when there is a huge number of path patterns, a huge number of
    attributes for a single pattern, or when the declared attribute
    names are huge.
    These overflows can be triggered via a crafted `.gitattributes` file
    that may be part of the commit history. Git silently splits lines
    longer than 2KB when parsing gitattributes from a file, but not when
    parsing them from the index. Consequentially, the failure mode
    depends on whether the file exists in the working tree, the index or
    both.
    This integer overflow can result in arbitrary heap reads and writes,
    which may result in remote code execution.
 Credit for finding CVE-2022-41903 goes to Joern Schneeweisz of GitLab.
 An initial fix was authored by Markus Vervier of X41 D-Sec. Credit for
 finding CVE-2022-23521 goes to Markus Vervier and Eric Sesterhenn of X41
 D-Sec. This work was sponsored by OSTIF.
 The proposed fixes have been polished and extended to cover additional
 findings by Patrick Steinhardt of GitLab, with help from others on the
 Git security mailing list.
 Patrick Steinhardt (21):
      attr: fix overflow when upserting attribute with overly long name
      attr: fix out-of-bounds read with huge attribute names
      attr: fix integer overflow when parsing huge attribute names
      attr: fix out-of-bounds write when parsing huge number of attributes
      attr: fix out-of-bounds read with unreasonable amount of patterns
      attr: fix integer overflow with more than INT_MAX macros
      attr: harden allocation against integer overflows
      attr: fix silently splitting up lines longer than 2048 bytes
      attr: ignore attribute lines exceeding 2048 bytes
      attr: ignore overly large gitattributes files
      pretty: fix out-of-bounds write caused by integer overflow
      pretty: fix out-of-bounds read when left-flushing with stealing
      pretty: fix out-of-bounds read when parsing invalid padding format
      pretty: fix adding linefeed when placeholder is not expanded
      pretty: fix integer overflow in wrapping format
      utf8: fix truncated string lengths in `utf8_strnwidth()`
      utf8: fix returning negative string width
      utf8: fix overflow when returning string width
      utf8: fix checking for glyph width in `strbuf_utf8_replace()`
      utf8: refactor `strbuf_utf8_replace` to not rely on preallocated buffer
      pretty: restrict input lengths for padding and wrapping formats
--- a/Documentation/RelNotes/2.31.6.txt
+++ b/Documentation/RelNotes/2.31.6.txt
@ -0,0 +1,5 @@
 Git v2.31.6 Release Notes
 =========================
 This release merges the security fix that appears in v2.30.7; see
 the release notes for that version for details.
--- a/attr.c
+++ b/attr.c
@ -28,7 +28,7 @@ static const char git_attr__unknown[] = "(builtin)unknown";
 #endif
 struct git_attr {
-	int attr_nr; /* unique attribute number */
+	unsigned int attr_nr; /* unique attribute number */
 	char name[FLEX_ARRAY]; /* attribute name */
 };
@ -210,7 +210,7 @@ static void report_invalid_attr(const char *name, size_t len,
 * dictionary.  If no entry is found, create a new attribute and store it in
 * the dictionary.
 */
-static const struct git_attr *git_attr_internal(const char *name, int namelen)
+static const struct git_attr *git_attr_internal(const char *name, size_t namelen)
 {
 	struct git_attr *a;
@ -226,8 +226,8 @@ static const struct git_attr *git_attr_internal(const char *name, int namelen)
 		a->attr_nr = hashmap_get_size(&g_attr_hashmap.map);
 		attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
-		assert(a->attr_nr ==
+		if (a->attr_nr != hashmap_get_size(&g_attr_hashmap.map) - 1)
-		       (hashmap_get_size(&g_attr_hashmap.map) - 1));
+			die(_("unable to add additional attribute"));
 	}
 	hashmap_unlock(&g_attr_hashmap);
@ -272,7 +272,7 @@ struct match_attr {
 		const struct git_attr *attr;
 	} u;
 	char is_macro;
-	unsigned num_attr;
+	size_t num_attr;
 	struct attr_state state[FLEX_ARRAY];
 };
@ -293,7 +293,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp,
 			      struct attr_state *e)
 {
 	const char *ep, *equals;
-	int len;
+	size_t len;
 	ep = cp + strcspn(cp, blank);
 	equals = strchr(cp, '=');
@ -337,8 +337,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp,
 static struct match_attr *parse_attr_line(const char *line, const char *src,
 					  int lineno, unsigned flags)
 {
-	int namelen;
+	size_t namelen, num_attr, i;
 	int num_attr, i;
 	const char *cp, *name, *states;
 	struct match_attr *res = NULL;
 	int is_macro;
@ -349,6 +348,11 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		return NULL;
 	name = cp;
 	if (strlen(line) >= ATTR_MAX_LINE_LENGTH) {
 		warning(_("ignoring overly long attributes line %d"), lineno);
 		return NULL;
 	}
 	if (*cp == '"' && !unquote_c_style(&pattern, name, &states)) {
 		name = pattern.buf;
 		namelen = pattern.len;
@ -385,10 +389,9 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 			goto fail_return;
 	}
-	res = xcalloc(1,
+	res = xcalloc(1, st_add3(sizeof(*res),
-		      sizeof(*res) +
+				 st_mult(sizeof(struct attr_state), num_attr),
-		      sizeof(struct attr_state) * num_attr +
+				 is_macro ? 0 : namelen + 1));
 		      (is_macro ? 0 : namelen + 1));
 	if (is_macro) {
 		res->u.attr = git_attr_internal(name, namelen);
 	} else {
@ -451,11 +454,12 @@ struct attr_stack {
 static void attr_stack_free(struct attr_stack *e)
 {
-	int i;
+	unsigned i;
 	free(e->origin);
 	for (i = 0; i < e->num_matches; i++) {
 		struct match_attr *a = e->attrs[i];
-		int j;
+		size_t j;
 		for (j = 0; j < a->num_attr; j++) {
 			const char *setto = a->state[j].setto;
 			if (setto == ATTR__TRUE ||
@ -664,8 +668,8 @@ static void handle_attr_line(struct attr_stack *res,
 	a = parse_attr_line(line, src, lineno, flags);
 	if (!a)
 		return;
-	ALLOC_GROW(res->attrs, res->num_matches + 1, res->alloc);
+	ALLOC_GROW_BY(res->attrs, res->num_matches, 1, res->alloc);
-	res->attrs[res->num_matches++] = a;
+	res->attrs[res->num_matches - 1] = a;
 }
 static struct attr_stack *read_attr_from_array(const char **list)
@ -705,11 +709,12 @@ void git_attr_set_direction(enum git_attr_direction new_direction)
 static struct attr_stack *read_attr_from_file(const char *path, unsigned flags)
 {
 	struct strbuf buf = STRBUF_INIT;
 	int fd;
 	FILE *fp;
 	struct attr_stack *res;
 	char buf[2048];
 	int lineno = 0;
 	struct stat st;
 	if (flags & READ_ATTR_NOFOLLOW)
 		fd = open_nofollow(path, O_RDONLY);
@ -721,15 +726,26 @@ static struct attr_stack *read_attr_from_file(const char *path, unsigned flags)
 		return NULL;
 	}
 	fp = xfdopen(fd, "r");
 	if (fstat(fd, &st)) {
 		warning_errno(_("cannot fstat gitattributes file '%s'"), path);
 		fclose(fp);
 		return NULL;
 	}
 	if (st.st_size >= ATTR_MAX_FILE_SIZE) {
 		warning(_("ignoring overly large gitattributes file '%s'"), path);
 		fclose(fp);
 		return NULL;
 	}
 	CALLOC_ARRAY(res, 1);
-	while (fgets(buf, sizeof(buf), fp)) {
+	while (strbuf_getline(&buf, fp) != EOF) {
-		char *bufp = buf;
+		if (!lineno && starts_with(buf.buf, utf8_bom))
-		if (!lineno)
+			strbuf_remove(&buf, 0, strlen(utf8_bom));
-			skip_utf8_bom(&bufp, strlen(bufp));
+		handle_attr_line(res, buf.buf, path, ++lineno, flags);
 		handle_attr_line(res, bufp, path, ++lineno, flags);
 	}
 	fclose(fp);
 	strbuf_release(&buf);
 	return res;
 }
@ -740,13 +756,18 @@ static struct attr_stack *read_attr_from_index(struct index_state *istate,
 	struct attr_stack *res;
 	char *buf, *sp;
 	int lineno = 0;
 	size_t size;
 	if (!istate)
 		return NULL;
-	buf = read_blob_data_from_index(istate, path, NULL);
+	buf = read_blob_data_from_index(istate, path, &size);
 	if (!buf)
 		return NULL;
 	if (size >= ATTR_MAX_FILE_SIZE) {
 		warning(_("ignoring overly large gitattributes blob '%s'"), path);
 		return NULL;
 	}
 	CALLOC_ARRAY(res, 1);
 	for (sp = buf; *sp; ) {
@ -1017,12 +1038,12 @@ static int macroexpand_one(struct all_attrs_item *all_attrs, int nr, int rem);
 static int fill_one(const char *what, struct all_attrs_item *all_attrs,
 		    const struct match_attr *a, int rem)
 {
-	int i;
+	size_t i;
-	for (i = a->num_attr - 1; rem > 0 && i >= 0; i--) {
+	for (i = a->num_attr; rem > 0 && i > 0; i--) {
-		const struct git_attr *attr = a->state[i].attr;
+		const struct git_attr *attr = a->state[i - 1].attr;
 		const char **n = &(all_attrs[attr->attr_nr].value);
-		const char *v = a->state[i].setto;
+		const char *v = a->state[i - 1].setto;
 		if (*n == ATTR__UNKNOWN) {
 			debug_set(what,
@ -1041,11 +1062,11 @@ static int fill(const char *path, int pathlen, int basename_offset,
 		struct all_attrs_item *all_attrs, int rem)
 {
 	for (; rem > 0 && stack; stack = stack->prev) {
-		int i;
+		unsigned i;
 		const char *base = stack->origin ? stack->origin : "";
-		for (i = stack->num_matches - 1; 0 < rem && 0 <= i; i--) {
+		for (i = stack->num_matches; 0 < rem && 0 < i; i--) {
-			const struct match_attr *a = stack->attrs[i];
+			const struct match_attr *a = stack->attrs[i - 1];
 			if (a->is_macro)
 				continue;
 			if (path_matches(path, pathlen, basename_offset,
@ -1076,11 +1097,11 @@ static void determine_macros(struct all_attrs_item *all_attrs,
 			     const struct attr_stack *stack)
 {
 	for (; stack; stack = stack->prev) {
-		int i;
+		unsigned i;
-		for (i = stack->num_matches - 1; i >= 0; i--) {
+		for (i = stack->num_matches; i > 0; i--) {
-			const struct match_attr *ma = stack->attrs[i];
+			const struct match_attr *ma = stack->attrs[i - 1];
 			if (ma->is_macro) {
-				int n = ma->u.attr->attr_nr;
+				unsigned int n = ma->u.attr->attr_nr;
 				if (!all_attrs[n].macro) {
 					all_attrs[n].macro = ma;
 				}
@ -1132,7 +1153,7 @@ void git_check_attr(struct index_state *istate,
 	collect_some_attrs(istate, path, check);
 	for (i = 0; i < check->nr; i++) {
-		size_t n = check->items[i].attr->attr_nr;
+		unsigned int n = check->items[i].attr->attr_nr;
 		const char *value = check->all_attrs[n].value;
 		if (value == ATTR__UNKNOWN)
 			value = ATTR__UNSET;
--- a/attr.h
+++ b/attr.h
@ -107,6 +107,18 @@
 * - Free the `attr_check` struct by calling `attr_check_free()`.
 */
 /**
 * The maximum line length for a gitattributes file. If the line exceeds this
 * length we will ignore it.
 */
 #define ATTR_MAX_LINE_LENGTH 2048
 /**
  * The maximum size of the giattributes file. If the file exceeds this size we
  * will ignore it.
  */
 #define ATTR_MAX_FILE_SIZE (100 * 1024 * 1024)
 struct index_state;
 /**
--- a/column.c
+++ b/column.c
@ -23,7 +23,7 @@ struct column_data {
 /* return length of 's' in letters, ANSI escapes stripped */
 static int item_length(const char *s)
 {
-	return utf8_strnwidth(s, -1, 1);
+	return utf8_strnwidth(s, strlen(s), 1);
 }
 /*
--- a/git-compat-util.h
+++ b/git-compat-util.h
@ -929,6 +929,14 @@ static inline size_t st_sub(size_t a, size_t b)
 	return a - b;
 }
 static inline int cast_size_t_to_int(size_t a)
 {
 	if (a > INT_MAX)
 		die("number too large to represent as int on this platform: %"PRIuMAX,
 		    (uintmax_t)a);
 	return (int)a;
 }
 #ifdef HAVE_ALLOCA_H
 # include <alloca.h>
 # define xalloca(size)      (alloca(size))
--- a/pretty.c
+++ b/pretty.c
@ -14,6 +14,13 @@
 #include "trailer.h"
 #include "run-command.h"
 /*
 * The limit for formatting directives, which enable the caller to append
 * arbitrarily many bytes to the formatted buffer. This includes padding
 * and wrapping formatters.
 */
 #define FORMATTING_LIMIT (16 * 1024)
 static char *user_format;
 static struct cmt_fmt_map {
 	const char *name;
@ -920,7 +927,9 @@ static void strbuf_wrap(struct strbuf *sb, size_t pos,
 	if (pos)
 		strbuf_add(&tmp, sb->buf, pos);
 	strbuf_add_wrapped_text(&tmp, sb->buf + pos,
-				(int) indent1, (int) indent2, (int) width);
+				cast_size_t_to_int(indent1),
 				cast_size_t_to_int(indent2),
 				cast_size_t_to_int(width));
 	strbuf_swap(&tmp, sb);
 	strbuf_release(&tmp);
 }
@ -1046,9 +1055,18 @@ static size_t parse_padding_placeholder(const char *placeholder,
 		const char *end = start + strcspn(start, ",)");
 		char *next;
 		int width;
-		if (!end || end == start)
+		if (!*end || end == start)
 			return 0;
 		width = strtol(start, &next, 10);
 		/*
 		 * We need to limit the amount of padding, or otherwise this
 		 * would allow the user to pad the buffer by arbitrarily many
 		 * bytes and thus cause resource exhaustion.
 		 */
 		if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT)
 			return 0;
 		if (next == start || width == 0)
 			return 0;
 		if (width < 0) {
@ -1293,6 +1311,16 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
 				if (*next != ')')
 					return 0;
 			}
 			/*
 			 * We need to limit the format here as it allows the
 			 * user to prepend arbitrarily many bytes to the buffer
 			 * when rewrapping.
 			 */
 			if (width > FORMATTING_LIMIT ||
 			    indent1 > FORMATTING_LIMIT ||
 			    indent2 > FORMATTING_LIMIT)
 				return 0;
 			rewrap_message_tail(sb, c, width, indent1, indent2);
 			return end - placeholder + 1;
 		} else
@ -1574,19 +1602,21 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
 				    struct format_commit_context *c)
 {
 	struct strbuf local_sb = STRBUF_INIT;
-	int total_consumed = 0, len, padding = c->padding;
+	size_t total_consumed = 0;
 	int len, padding = c->padding;
 	if (padding < 0) {
 		const char *start = strrchr(sb->buf, '\n');
 		int occupied;
 		if (!start)
 			start = sb->buf;
-		occupied = utf8_strnwidth(start, -1, 1);
+		occupied = utf8_strnwidth(start, strlen(start), 1);
 		occupied += c->pretty_ctx->graph_width;
 		padding = (-padding) - occupied;
 	}
 	while (1) {
 		int modifier = *placeholder == 'C';
-		int consumed = format_commit_one(&local_sb, placeholder, c);
+		size_t consumed = format_commit_one(&local_sb, placeholder, c);
 		total_consumed += consumed;
 		if (!modifier)
@ -1598,7 +1628,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
 		placeholder++;
 		total_consumed++;
 	}
-	len = utf8_strnwidth(local_sb.buf, -1, 1);
+	len = utf8_strnwidth(local_sb.buf, local_sb.len, 1);
 	if (c->flush_type == flush_left_and_steal) {
 		const char *ch = sb->buf + sb->len - 1;
@ -1613,7 +1643,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
 			if (*ch != 'm')
 				break;
 			p = ch - 1;
-			while (ch - p < 10 && *p != '\033')
+			while (p > sb->buf && ch - p < 10 && *p != '\033')
 				p--;
 			if (*p != '\033' ||
 			    ch + 1 - p != display_mode_esc_sequence_len(p))
@ -1652,7 +1682,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
 		}
 		strbuf_addbuf(sb, &local_sb);
 	} else {
-		int sb_len = sb->len, offset = 0;
+		size_t sb_len = sb->len, offset = 0;
 		if (c->flush_type == flush_left)
 			offset = padding - len;
 		else if (c->flush_type == flush_both)
@ -1675,8 +1705,7 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
 				 const char *placeholder,
 				 void *context)
 {
-	int consumed;
+	size_t consumed, orig_len;
 	size_t orig_len;
 	enum {
 		NO_MAGIC,
 		ADD_LF_BEFORE_NON_EMPTY,
@ -1697,9 +1726,21 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
 	default:
 		break;
 	}
-	if (magic != NO_MAGIC)
+	if (magic != NO_MAGIC) {
 		placeholder++;
 		switch (placeholder[0]) {
 		case 'w':
 			/*
 			 * `%+w()` cannot ever expand to a non-empty string,
 			 * and it potentially changes the layout of preceding
 			 * contents. We're thus not able to handle the magic in
 			 * this combination and refuse the pattern.
 			 */
 			return 0;
 		};
 	}
 	orig_len = sb->len;
 	if (((struct format_commit_context *)context)->flush_type != no_flush)
 		consumed = format_and_pad_commit(sb, placeholder, context);
--- a/t/t0003-attributes.sh
+++ b/t/t0003-attributes.sh
@ -369,4 +369,63 @@ test_expect_success SYMLINKS 'symlinks not respected in-tree' '
 	test_i18ngrep "unable to access.*gitattributes" err
 '
 test_expect_success 'large attributes line ignored in tree' '
 	test_when_finished "rm .gitattributes" &&
 	printf "path %02043d" 1 >.gitattributes &&
 	git check-attr --all path >actual 2>err &&
 	echo "warning: ignoring overly long attributes line 1" >expect &&
 	test_cmp expect err &&
 	test_must_be_empty actual
 '
 test_expect_success 'large attributes line ignores trailing content in tree' '
 	test_when_finished "rm .gitattributes" &&
 	# older versions of Git broke lines at 2048 bytes; the 2045 bytes
 	# of 0-padding here is accounting for the three bytes of "a 1", which
 	# would knock "trailing" to the "next" line, where it would be
 	# erroneously parsed.
 	printf "a %02045dtrailing attribute\n" 1 >.gitattributes &&
 	git check-attr --all trailing >actual 2>err &&
 	echo "warning: ignoring overly long attributes line 1" >expect &&
 	test_cmp expect err &&
 	test_must_be_empty actual
 '
 test_expect_success EXPENSIVE 'large attributes file ignored in tree' '
 	test_when_finished "rm .gitattributes" &&
 	dd if=/dev/zero of=.gitattributes bs=101M count=1 2>/dev/null &&
 	git check-attr --all path >/dev/null 2>err &&
 	echo "warning: ignoring overly large gitattributes file ${SQ}.gitattributes${SQ}" >expect &&
 	test_cmp expect err
 '
 test_expect_success 'large attributes line ignored in index' '
 	test_when_finished "git update-index --remove .gitattributes" &&
 	blob=$(printf "path %02043d" 1 | git hash-object -w --stdin) &&
 	git update-index --add --cacheinfo 100644,$blob,.gitattributes &&
 	git check-attr --cached --all path >actual 2>err &&
 	echo "warning: ignoring overly long attributes line 1" >expect &&
 	test_cmp expect err &&
 	test_must_be_empty actual
 '
 test_expect_success 'large attributes line ignores trailing content in index' '
 	test_when_finished "git update-index --remove .gitattributes" &&
 	blob=$(printf "a %02045dtrailing attribute\n" 1 | git hash-object -w --stdin) &&
 	git update-index --add --cacheinfo 100644,$blob,.gitattributes &&
 	git check-attr --cached --all trailing >actual 2>err &&
 	echo "warning: ignoring overly long attributes line 1" >expect &&
 	test_cmp expect err &&
 	test_must_be_empty actual
 '
 test_expect_success EXPENSIVE 'large attributes file ignored in index' '
 	test_when_finished "git update-index --remove .gitattributes" &&
 	blob=$(dd if=/dev/zero bs=101M count=1 2>/dev/null | git hash-object -w --stdin) &&
 	git update-index --add --cacheinfo 100644,$blob,.gitattributes &&
 	git check-attr --cached --all path >/dev/null 2>err &&
 	echo "warning: ignoring overly large gitattributes blob ${SQ}.gitattributes${SQ}" >expect &&
 	test_cmp expect err
 '
 test_done
--- a/t/t4205-log-pretty-formats.sh
+++ b/t/t4205-log-pretty-formats.sh
@ -1002,4 +1002,80 @@ test_expect_success '%(describe:exclude=...) vs git describe --exclude ...' '
 	test_cmp expect actual
 '
 test_expect_success 'log --pretty with space stealing' '
 	printf mm0 >expect &&
 	git log -1 --pretty="format:mm%>>|(1)%x30" >actual &&
 	test_cmp expect actual
 '
 test_expect_success 'log --pretty with invalid padding format' '
 	printf "%s%%<(20" "$(git rev-parse HEAD)" >expect &&
 	git log -1 --pretty="format:%H%<(20" >actual &&
 	test_cmp expect actual
 '
 test_expect_success 'log --pretty with magical wrapping directives' '
 	commit_id=$(git commit-tree HEAD^{tree} -m "describe me") &&
 	git tag describe-me $commit_id &&
 	printf "\n(tag:\ndescribe-me)%%+w(2)" >expect &&
 	git log -1 --pretty="format:%w(1)%+d%+w(2)" $commit_id >actual &&
 	test_cmp expect actual
 '
 test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' '
 	printf "%%w(2147483649,1,1)0" >expect &&
 	git log -1 --pretty="format:%w(2147483649,1,1)%x30" >actual &&
 	test_cmp expect actual &&
 	printf "%%w(1,2147483649,1)0" >expect &&
 	git log -1 --pretty="format:%w(1,2147483649,1)%x30" >actual &&
 	test_cmp expect actual &&
 	printf "%%w(1,1,2147483649)0" >expect &&
 	git log -1 --pretty="format:%w(1,1,2147483649)%x30" >actual &&
 	test_cmp expect actual
 '
 test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing padding directive' '
 	printf "%%<(2147483649)0" >expect &&
 	git log -1 --pretty="format:%<(2147483649)%x30" >actual &&
 	test_cmp expect actual
 '
 test_expect_success 'log --pretty with padding and preceding control chars' '
 	printf "\20\20   0" >expect &&
 	git log -1 --pretty="format:%x10%x10%>|(4)%x30" >actual &&
 	test_cmp expect actual
 '
 test_expect_success 'log --pretty truncation with control chars' '
 	test_commit "$(printf "\20\20\20\20xxxx")" file contents commit-with-control-chars &&
 	printf "\20\20\20\20x.." >expect &&
 	git log -1 --pretty="format:%<(3,trunc)%s" commit-with-control-chars >actual &&
 	test_cmp expect actual
 '
 test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' '
 	# We only assert that this command does not crash. This needs to be
 	# executed with the address sanitizer to demonstrate failure.
 	git log -1 --pretty="format:%>(2147483646)%x41%41%>(2147483646)%x41" >/dev/null
 '
 test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'set up huge commit' '
 	test-tool genzeros 2147483649 | tr "\000" "1" >expect &&
 	huge_commit=$(git commit-tree -F expect HEAD^{tree})
 '
 test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' '
 	git log -1 --format="%B%<(1)%x30" $huge_commit >actual &&
 	echo 0 >>expect &&
 	test_cmp expect actual
 '
 test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message does not cause allocation failure' '
 	test_must_fail git log -1 --format="%<(1)%B" $huge_commit 2>error &&
 	cat >expect <<-EOF &&
 	fatal: number too large to represent as int on this platform: 2147483649
 	EOF
 	test_cmp expect error
 '
 test_done
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@ -1642,6 +1642,10 @@ build_option () {
 	sed -ne "s/^$1: //p"
 }
 test_lazy_prereq SIZE_T_IS_64BIT '
 	test 8 -eq "$(build_option sizeof-size_t)"
 '
 test_lazy_prereq LONG_IS_64BIT '
 	test 8 -le "$(build_option sizeof-long)"
 '
--- a/utf8.c
+++ b/utf8.c
@ -206,26 +206,34 @@ int utf8_width(const char **start, size_t *remainder_p)
 * string, assuming that the string is utf8.  Returns strlen() instead
 * if the string does not look like a valid utf8 string.
 */
-int utf8_strnwidth(const char *string, int len, int skip_ansi)
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi)
 {
 	int width = 0;
 	const char *orig = string;
 	size_t width = 0;
 	if (len == -1)
 		len = strlen(string);
 	while (string && string < orig + len) {
-		int skip;
+		int glyph_width;
 		size_t skip;
 		while (skip_ansi &&
 		       (skip = display_mode_esc_sequence_len(string)) != 0)
 			string += skip;
-		width += utf8_width(&string, NULL);
+
 		glyph_width = utf8_width(&string, NULL);
 		if (glyph_width > 0)
 			width += glyph_width;
 	}
-	return string ? width : len;
+
 	/*
 	 * TODO: fix the interface of this function and `utf8_strwidth()` to
 	 * return `size_t` instead of `int`.
 	 */
 	return cast_size_t_to_int(string ? width : len);
 }
 int utf8_strwidth(const char *string)
 {
-	return utf8_strnwidth(string, -1, 0);
+	return utf8_strnwidth(string, strlen(string), 0);
 }
 int is_utf8(const char *text)
@ -357,51 +365,52 @@ void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
 void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width,
 			 const char *subst)
 {
-	struct strbuf sb_dst = STRBUF_INIT;
+	const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len;
-	char *src = sb_src->buf;
+	struct strbuf dst;
-	char *end = src + sb_src->len;
+	int w = 0;
 	char *dst;
 	int w = 0, subst_len = 0;
-	if (subst)
+	strbuf_init(&dst, sb_src->len);
 		subst_len = strlen(subst);
 	strbuf_grow(&sb_dst, sb_src->len + subst_len);
 	dst = sb_dst.buf;
 	while (src < end) {
-		char *old;
+		const char *old;
 		int glyph_width;
 		size_t n;
 		while ((n = display_mode_esc_sequence_len(src))) {
-			memcpy(dst, src, n);
+			strbuf_add(&dst, src, n);
 			src += n;
 			dst += n;
 		}
 		if (src >= end)
 			break;
 		old = src;
-		n = utf8_width((const char**)&src, NULL);
+		glyph_width = utf8_width((const char**)&src, NULL);
-		if (!src) 	/* broken utf-8, do nothing */
+		if (!src) /* broken utf-8, do nothing */
 			goto out;
-		if (n && w >= pos && w < pos + width) {
+
 		/*
 		 * In case we see a control character we copy it into the
 		 * buffer, but don't add it to the width.
 		 */
 		if (glyph_width < 0)
 			glyph_width = 0;
 		if (glyph_width && w >= pos && w < pos + width) {
 			if (subst) {
-				memcpy(dst, subst, subst_len);
+				strbuf_addstr(&dst, subst);
 				dst += subst_len;
 				subst = NULL;
 			}
-			w += n;
+		} else {
-			continue;
+			strbuf_add(&dst, old, src - old);
 		}
-		memcpy(dst, old, src - old);
+
-		dst += src - old;
+		w += glyph_width;
 		w += n;
 	}
-	strbuf_setlen(&sb_dst, dst - sb_dst.buf);
+
-	strbuf_swap(sb_src, &sb_dst);
+	strbuf_swap(sb_src, &dst);
 out:
-	strbuf_release(&sb_dst);
+	strbuf_release(&dst);
 }
 /*
@ -796,7 +805,7 @@ int skip_utf8_bom(char **text, size_t len)
 void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width,
 		       const char *s)
 {
-	int slen = strlen(s);
+	size_t slen = strlen(s);
 	int display_len = utf8_strnwidth(s, slen, 0);
 	int utf8_compensation = slen - display_len;
--- a/utf8.h
+++ b/utf8.h
@ -7,7 +7,7 @@ typedef unsigned int ucs_char_t;  /* assuming 32bit int */
 size_t display_mode_esc_sequence_len(const char *s);
 int utf8_width(const char **start, size_t *remainder_p);
-int utf8_strnwidth(const char *string, int len, int skip_ansi);
+int utf8_strnwidth(const char *string, size_t len, int skip_ansi);
 int utf8_strwidth(const char *string);
 int is_utf8(const char *text);
 int is_encoding_utf8(const char *name);