From b6537d83ee30693efb17a35321b8bd03b752033a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 11 Feb 2020 12:18:52 -0500
Subject: [PATCH 1/4] mailinfo: treat header values as C strings

We read each header line into a strbuf, which means that we could
in theory handle header values with embedded NUL bytes. But in practice,
the values we parse out are passed to decode_header(), which uses
strstr(), strchr(), etc. And we would not expect such bytes anyway; they
are forbidden by RFC822, etc. and any non-ASCII characters should be
encoded with RFC2047 encoding.

So let's switch to using strbuf_addstr(), which saves us some length
computations (and will enable further cleanups in this code).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 mailinfo.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index 402ef04dd1..59d5a8b8f3 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -557,7 +557,7 @@ static int check_header(struct mailinfo *mi,
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
+			strbuf_addstr(&sb, line->buf + len + 2);
 			decode_header(mi, &sb);
 			handle_header(&hdr_data[i], &sb);
 			ret = 1;
@@ -568,7 +568,7 @@ static int check_header(struct mailinfo *mi,
 	/* Content stuff */
 	if (cmp_header(line, "Content-Type")) {
 		len = strlen("Content-Type: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+		strbuf_addstr(&sb, line->buf + len);
 		decode_header(mi, &sb);
 		handle_content_type(mi, &sb);
 		ret = 1;
@@ -576,7 +576,7 @@ static int check_header(struct mailinfo *mi,
 	}
 	if (cmp_header(line, "Content-Transfer-Encoding")) {
 		len = strlen("Content-Transfer-Encoding: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+		strbuf_addstr(&sb, line->buf + len);
 		decode_header(mi, &sb);
 		handle_content_transfer_encoding(mi, &sb);
 		ret = 1;
@@ -584,7 +584,7 @@ static int check_header(struct mailinfo *mi,
 	}
 	if (cmp_header(line, "Message-Id")) {
 		len = strlen("Message-Id: ");
-		strbuf_add(&sb, line->buf + len, line->len - len);
+		strbuf_addstr(&sb, line->buf + len);
 		decode_header(mi, &sb);
 		if (mi->add_message_id)
 			mi->message_id = strbuf_detach(&sb, NULL);

From f447d0293e803d9acf390bbdf373ed98bdc125f4 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 11 Feb 2020 12:19:23 -0500
Subject: [PATCH 2/4] mailinfo: simplify parsing of header values

Our code to parse header values first checks to see if a line starts
with a header, and then manually skips past the matched string to find
the value. We can do this all in one step by modeling after
skip_prefix(), which returns a pointer into the string after the
parsing.

This lets us remove some repeated strings, and will also enable us to
parse more flexibly in a future patch.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 mailinfo.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index 59d5a8b8f3..ee8d05e239 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -346,11 +346,16 @@ static const char *header[MAX_HDR_PARSED] = {
 	"From","Subject","Date",
 };
 
-static inline int cmp_header(const struct strbuf *line, const char *hdr)
+static inline int skip_header(const struct strbuf *line, const char *hdr,
+			      const char **outval)
 {
-	int len = strlen(hdr);
-	return !strncasecmp(line->buf, hdr, len) && line->len > len &&
-			line->buf[len] == ':' && isspace(line->buf[len + 1]);
+	const char *val;
+	if (!skip_iprefix(line->buf, hdr, &val) ||
+	    *val++ != ':' ||
+	    !isspace(*val++))
+		return 0;
+	*outval = val;
+	return 1;
 }
 
 static int is_format_patch_separator(const char *line, int len)
@@ -547,17 +552,18 @@ static int check_header(struct mailinfo *mi,
 			const struct strbuf *line,
 			struct strbuf *hdr_data[], int overwrite)
 {
-	int i, ret = 0, len;
+	int i, ret = 0;
 	struct strbuf sb = STRBUF_INIT;
+	const char *val;
 
 	/* search for the interesting parts */
 	for (i = 0; header[i]; i++) {
-		int len = strlen(header[i]);
-		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
+		if ((!hdr_data[i] || overwrite) &&
+		    skip_header(line, header[i], &val)) {
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			strbuf_addstr(&sb, line->buf + len + 2);
+			strbuf_addstr(&sb, val);
 			decode_header(mi, &sb);
 			handle_header(&hdr_data[i], &sb);
 			ret = 1;
@@ -566,25 +572,22 @@ static int check_header(struct mailinfo *mi,
 	}
 
 	/* Content stuff */
-	if (cmp_header(line, "Content-Type")) {
-		len = strlen("Content-Type: ");
-		strbuf_addstr(&sb, line->buf + len);
+	if (skip_header(line, "Content-Type", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (cmp_header(line, "Content-Transfer-Encoding")) {
-		len = strlen("Content-Transfer-Encoding: ");
-		strbuf_addstr(&sb, line->buf + len);
+	if (skip_header(line, "Content-Transfer-Encoding", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		handle_content_transfer_encoding(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (cmp_header(line, "Message-Id")) {
-		len = strlen("Message-Id: ");
-		strbuf_addstr(&sb, line->buf + len);
+	if (skip_header(line, "Message-Id", &val)) {
+		strbuf_addstr(&sb, val);
 		decode_header(mi, &sb);
 		if (mi->add_message_id)
 			mi->message_id = strbuf_detach(&sb, NULL);
@@ -606,8 +609,9 @@ static int is_inbody_header(const struct mailinfo *mi,
 			    const struct strbuf *line)
 {
 	int i;
+	const char *val;
 	for (i = 0; header[i]; i++)
-		if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
+		if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
 			return 1;
 	return 0;
 }

From ffbea1816dcbce12d4ffb304ddf8993ef611d4f1 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 11 Feb 2020 12:19:53 -0500
Subject: [PATCH 3/4] mailinfo: be more liberal with header whitespace

RFC822 and friends allow arbitrary whitespace after the colon of a
header and before the values. I.e.:

  Subject:foo
  Subject: foo
  Subject:  foo

all have the subject "foo". But mailinfo requires exactly one space.
This doesn't seem to be bothering anybody, but it is pickier than the
standard specifies. And we can easily just soak up arbitrary whitespace
there in our parser, so let's do so.

Note that the test covers both too little and too much whitespace, but
the "too much" case already works fine (because we later eat leading and
trailing whitespace from the values).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 mailinfo.c          |  5 +++--
 t/t5100-mailinfo.sh | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index ee8d05e239..78f06da524 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -351,9 +351,10 @@ static inline int skip_header(const struct strbuf *line, const char *hdr,
 {
 	const char *val;
 	if (!skip_iprefix(line->buf, hdr, &val) ||
-	    *val++ != ':' ||
-	    !isspace(*val++))
+	    *val++ != ':')
 		return 0;
+	while (isspace(*val))
+		val++;
 	*outval = val;
 	return 1;
 }
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 9690dcad4f..147e616533 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -213,4 +213,19 @@ test_expect_failure 'mailinfo -b separated double [PATCH]' '
 	test z"$subj" = z"Subject: [other] message"
 '
 
+test_expect_success 'mailinfo handles unusual header whitespace' '
+	git mailinfo /dev/null /dev/null >actual <<-\EOF &&
+	From:Real Name <user@example.com>
+	Subject:    extra spaces
+	EOF
+
+	cat >expect <<-\EOF &&
+	Author: Real Name
+	Email: user@example.com
+	Subject: extra spaces
+
+	EOF
+	test_cmp expect actual
+'
+
 test_done

From f696a2b1c850c81130740945835beec72727debf Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 11 Feb 2020 12:20:05 -0500
Subject: [PATCH 4/4] mailinfo: factor out some repeated header handling

We do the same thing for each header: match it, copy it to a strbuf, and
decode it. Let's put that in a helper function to avoid repetition.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 mailinfo.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index 78f06da524..cf92255515 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -549,23 +549,36 @@ release_return:
 		mi->input_error = -1;
 }
 
+/*
+ * Returns true if "line" contains a header matching "hdr", in which case "val"
+ * will contain the value of the header with any RFC2047 B and Q encoding
+ * unwrapped, and optionally normalize the meta information to utf8.
+ */
+static int parse_header(const struct strbuf *line,
+			const char *hdr,
+			struct mailinfo *mi,
+			struct strbuf *val)
+{
+	const char *val_str;
+
+	if (!skip_header(line, hdr, &val_str))
+		return 0;
+	strbuf_addstr(val, val_str);
+	decode_header(mi, val);
+	return 1;
+}
+
 static int check_header(struct mailinfo *mi,
 			const struct strbuf *line,
 			struct strbuf *hdr_data[], int overwrite)
 {
 	int i, ret = 0;
 	struct strbuf sb = STRBUF_INIT;
-	const char *val;
 
 	/* search for the interesting parts */
 	for (i = 0; header[i]; i++) {
 		if ((!hdr_data[i] || overwrite) &&
-		    skip_header(line, header[i], &val)) {
-			/* Unwrap inline B and Q encoding, and optionally
-			 * normalize the meta information to utf8.
-			 */
-			strbuf_addstr(&sb, val);
-			decode_header(mi, &sb);
+		    parse_header(line, header[i], mi, &sb)) {
 			handle_header(&hdr_data[i], &sb);
 			ret = 1;
 			goto check_header_out;
@@ -573,23 +586,17 @@ static int check_header(struct mailinfo *mi,
 	}
 
 	/* Content stuff */
-	if (skip_header(line, "Content-Type", &val)) {
-		strbuf_addstr(&sb, val);
-		decode_header(mi, &sb);
+	if (parse_header(line, "Content-Type", mi, &sb)) {
 		handle_content_type(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (skip_header(line, "Content-Transfer-Encoding", &val)) {
-		strbuf_addstr(&sb, val);
-		decode_header(mi, &sb);
+	if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
 		handle_content_transfer_encoding(mi, &sb);
 		ret = 1;
 		goto check_header_out;
 	}
-	if (skip_header(line, "Message-Id", &val)) {
-		strbuf_addstr(&sb, val);
-		decode_header(mi, &sb);
+	if (parse_header(line, "Message-Id", mi, &sb)) {
 		if (mi->add_message_id)
 			mi->message_id = strbuf_detach(&sb, NULL);
 		ret = 1;