From b6537d83ee30693efb17a35321b8bd03b752033a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 11 Feb 2020 12:18:52 -0500 Subject: [PATCH 1/4] mailinfo: treat header values as C strings We read each header line into a strbuf, which means that we could in theory handle header values with embedded NUL bytes. But in practice, the values we parse out are passed to decode_header(), which uses strstr(), strchr(), etc. And we would not expect such bytes anyway; they are forbidden by RFC822, etc. and any non-ASCII characters should be encoded with RFC2047 encoding. So let's switch to using strbuf_addstr(), which saves us some length computations (and will enable further cleanups in this code). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- mailinfo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index 402ef04dd1..59d5a8b8f3 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -557,7 +557,7 @@ static int check_header(struct mailinfo *mi, /* Unwrap inline B and Q encoding, and optionally * normalize the meta information to utf8. */ - strbuf_add(&sb, line->buf + len + 2, line->len - len - 2); + strbuf_addstr(&sb, line->buf + len + 2); decode_header(mi, &sb); handle_header(&hdr_data[i], &sb); ret = 1; @@ -568,7 +568,7 @@ static int check_header(struct mailinfo *mi, /* Content stuff */ if (cmp_header(line, "Content-Type")) { len = strlen("Content-Type: "); - strbuf_add(&sb, line->buf + len, line->len - len); + strbuf_addstr(&sb, line->buf + len); decode_header(mi, &sb); handle_content_type(mi, &sb); ret = 1; @@ -576,7 +576,7 @@ static int check_header(struct mailinfo *mi, } if (cmp_header(line, "Content-Transfer-Encoding")) { len = strlen("Content-Transfer-Encoding: "); - strbuf_add(&sb, line->buf + len, line->len - len); + strbuf_addstr(&sb, line->buf + len); decode_header(mi, &sb); handle_content_transfer_encoding(mi, &sb); ret = 1; @@ -584,7 +584,7 @@ static int check_header(struct mailinfo *mi, } if (cmp_header(line, "Message-Id")) { len = strlen("Message-Id: "); - strbuf_add(&sb, line->buf + len, line->len - len); + strbuf_addstr(&sb, line->buf + len); decode_header(mi, &sb); if (mi->add_message_id) mi->message_id = strbuf_detach(&sb, NULL); From f447d0293e803d9acf390bbdf373ed98bdc125f4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 11 Feb 2020 12:19:23 -0500 Subject: [PATCH 2/4] mailinfo: simplify parsing of header values Our code to parse header values first checks to see if a line starts with a header, and then manually skips past the matched string to find the value. We can do this all in one step by modeling after skip_prefix(), which returns a pointer into the string after the parsing. This lets us remove some repeated strings, and will also enable us to parse more flexibly in a future patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- mailinfo.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index 59d5a8b8f3..ee8d05e239 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -346,11 +346,16 @@ static const char *header[MAX_HDR_PARSED] = { "From","Subject","Date", }; -static inline int cmp_header(const struct strbuf *line, const char *hdr) +static inline int skip_header(const struct strbuf *line, const char *hdr, + const char **outval) { - int len = strlen(hdr); - return !strncasecmp(line->buf, hdr, len) && line->len > len && - line->buf[len] == ':' && isspace(line->buf[len + 1]); + const char *val; + if (!skip_iprefix(line->buf, hdr, &val) || + *val++ != ':' || + !isspace(*val++)) + return 0; + *outval = val; + return 1; } static int is_format_patch_separator(const char *line, int len) @@ -547,17 +552,18 @@ static int check_header(struct mailinfo *mi, const struct strbuf *line, struct strbuf *hdr_data[], int overwrite) { - int i, ret = 0, len; + int i, ret = 0; struct strbuf sb = STRBUF_INIT; + const char *val; /* search for the interesting parts */ for (i = 0; header[i]; i++) { - int len = strlen(header[i]); - if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) { + if ((!hdr_data[i] || overwrite) && + skip_header(line, header[i], &val)) { /* Unwrap inline B and Q encoding, and optionally * normalize the meta information to utf8. */ - strbuf_addstr(&sb, line->buf + len + 2); + strbuf_addstr(&sb, val); decode_header(mi, &sb); handle_header(&hdr_data[i], &sb); ret = 1; @@ -566,25 +572,22 @@ static int check_header(struct mailinfo *mi, } /* Content stuff */ - if (cmp_header(line, "Content-Type")) { - len = strlen("Content-Type: "); - strbuf_addstr(&sb, line->buf + len); + if (skip_header(line, "Content-Type", &val)) { + strbuf_addstr(&sb, val); decode_header(mi, &sb); handle_content_type(mi, &sb); ret = 1; goto check_header_out; } - if (cmp_header(line, "Content-Transfer-Encoding")) { - len = strlen("Content-Transfer-Encoding: "); - strbuf_addstr(&sb, line->buf + len); + if (skip_header(line, "Content-Transfer-Encoding", &val)) { + strbuf_addstr(&sb, val); decode_header(mi, &sb); handle_content_transfer_encoding(mi, &sb); ret = 1; goto check_header_out; } - if (cmp_header(line, "Message-Id")) { - len = strlen("Message-Id: "); - strbuf_addstr(&sb, line->buf + len); + if (skip_header(line, "Message-Id", &val)) { + strbuf_addstr(&sb, val); decode_header(mi, &sb); if (mi->add_message_id) mi->message_id = strbuf_detach(&sb, NULL); @@ -606,8 +609,9 @@ static int is_inbody_header(const struct mailinfo *mi, const struct strbuf *line) { int i; + const char *val; for (i = 0; header[i]; i++) - if (!mi->s_hdr_data[i] && cmp_header(line, header[i])) + if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val)) return 1; return 0; } From ffbea1816dcbce12d4ffb304ddf8993ef611d4f1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 11 Feb 2020 12:19:53 -0500 Subject: [PATCH 3/4] mailinfo: be more liberal with header whitespace RFC822 and friends allow arbitrary whitespace after the colon of a header and before the values. I.e.: Subject:foo Subject: foo Subject: foo all have the subject "foo". But mailinfo requires exactly one space. This doesn't seem to be bothering anybody, but it is pickier than the standard specifies. And we can easily just soak up arbitrary whitespace there in our parser, so let's do so. Note that the test covers both too little and too much whitespace, but the "too much" case already works fine (because we later eat leading and trailing whitespace from the values). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- mailinfo.c | 5 +++-- t/t5100-mailinfo.sh | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index ee8d05e239..78f06da524 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -351,9 +351,10 @@ static inline int skip_header(const struct strbuf *line, const char *hdr, { const char *val; if (!skip_iprefix(line->buf, hdr, &val) || - *val++ != ':' || - !isspace(*val++)) + *val++ != ':') return 0; + while (isspace(*val)) + val++; *outval = val; return 1; } diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh index 9690dcad4f..147e616533 100755 --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@ -213,4 +213,19 @@ test_expect_failure 'mailinfo -b separated double [PATCH]' ' test z"$subj" = z"Subject: [other] message" ' +test_expect_success 'mailinfo handles unusual header whitespace' ' + git mailinfo /dev/null /dev/null >actual <<-\EOF && + From:Real Name + Subject: extra spaces + EOF + + cat >expect <<-\EOF && + Author: Real Name + Email: user@example.com + Subject: extra spaces + + EOF + test_cmp expect actual +' + test_done From f696a2b1c850c81130740945835beec72727debf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 11 Feb 2020 12:20:05 -0500 Subject: [PATCH 4/4] mailinfo: factor out some repeated header handling We do the same thing for each header: match it, copy it to a strbuf, and decode it. Let's put that in a helper function to avoid repetition. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- mailinfo.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index 78f06da524..cf92255515 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -549,23 +549,36 @@ release_return: mi->input_error = -1; } +/* + * Returns true if "line" contains a header matching "hdr", in which case "val" + * will contain the value of the header with any RFC2047 B and Q encoding + * unwrapped, and optionally normalize the meta information to utf8. + */ +static int parse_header(const struct strbuf *line, + const char *hdr, + struct mailinfo *mi, + struct strbuf *val) +{ + const char *val_str; + + if (!skip_header(line, hdr, &val_str)) + return 0; + strbuf_addstr(val, val_str); + decode_header(mi, val); + return 1; +} + static int check_header(struct mailinfo *mi, const struct strbuf *line, struct strbuf *hdr_data[], int overwrite) { int i, ret = 0; struct strbuf sb = STRBUF_INIT; - const char *val; /* search for the interesting parts */ for (i = 0; header[i]; i++) { if ((!hdr_data[i] || overwrite) && - skip_header(line, header[i], &val)) { - /* Unwrap inline B and Q encoding, and optionally - * normalize the meta information to utf8. - */ - strbuf_addstr(&sb, val); - decode_header(mi, &sb); + parse_header(line, header[i], mi, &sb)) { handle_header(&hdr_data[i], &sb); ret = 1; goto check_header_out; @@ -573,23 +586,17 @@ static int check_header(struct mailinfo *mi, } /* Content stuff */ - if (skip_header(line, "Content-Type", &val)) { - strbuf_addstr(&sb, val); - decode_header(mi, &sb); + if (parse_header(line, "Content-Type", mi, &sb)) { handle_content_type(mi, &sb); ret = 1; goto check_header_out; } - if (skip_header(line, "Content-Transfer-Encoding", &val)) { - strbuf_addstr(&sb, val); - decode_header(mi, &sb); + if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) { handle_content_transfer_encoding(mi, &sb); ret = 1; goto check_header_out; } - if (skip_header(line, "Message-Id", &val)) { - strbuf_addstr(&sb, val); - decode_header(mi, &sb); + if (parse_header(line, "Message-Id", mi, &sb)) { if (mi->add_message_id) mi->message_id = strbuf_detach(&sb, NULL); ret = 1;