From f8128cfb8d5892e76611d024a19c1ecdace9a39e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:44:11 -0600 Subject: [PATCH 1/6] Make read_one_header_line return a flag not a length. Currently we only use the return value from read_one_header line to tell if the line we have read is a header or not. So make it a flag. This paves the way for better email detection. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index b27651935d..83a2986e7e 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -331,7 +331,7 @@ struct header_def { int namelen; }; -static void check_header(char *line, int len, struct header_def *header) +static void check_header(char *line, struct header_def *header) { int i; @@ -349,7 +349,7 @@ static void check_header(char *line, int len, struct header_def *header) } } -static void check_subheader_line(char *line, int len) +static void check_subheader_line(char *line) { static struct header_def header[] = { { "Content-Type", handle_subcontent_type }, @@ -357,9 +357,9 @@ static void check_subheader_line(char *line, int len) handle_content_transfer_encoding }, { NULL }, }; - check_header(line, len, header); + check_header(line, header); } -static void check_header_line(char *line, int len) +static void check_header_line(char *line) { static struct header_def header[] = { { "From", handle_from }, @@ -370,7 +370,7 @@ static void check_header_line(char *line, int len) handle_content_transfer_encoding }, { NULL }, }; - check_header(line, len, header); + check_header(line, header); } static int read_one_header_line(char *line, int sz, FILE *in) @@ -709,8 +709,8 @@ static void handle_multipart_body(void) return; /* We are on boundary line. Start slurping the subhead. */ while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { + int hdr = read_one_header_line(line, sizeof(line), stdin); + if (!hdr) { if (handle_multipart_one_part() < 0) return; /* Reset per part headers */ @@ -718,7 +718,7 @@ static void handle_multipart_body(void) charset[0] = 0; } else - check_subheader_line(line, len); + check_subheader_line(line); } fclose(patchfile); if (!patch_lines) { @@ -787,15 +787,15 @@ int main(int argc, char **argv) exit(1); } while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { + int hdr = read_one_header_line(line, sizeof(line), stdin); + if (!hdr) { if (multipart_boundary[0]) handle_multipart_body(); else handle_body(); break; } - check_header_line(line, len); + check_header_line(line); } return 0; } From 3350453014324e375cdca722b50e93cdd78894ed Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:45:37 -0600 Subject: [PATCH 2/6] Move B and Q decoding into check header. B and Q decoding is not appropriate for in body headers, so move it up to where we explicitly know we have a real email header. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index 83a2986e7e..bee7b202cf 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -324,6 +324,7 @@ static void cleanup_space(char *buf) } } +static void decode_header_bq(char *it); typedef int (*header_fn_t)(char *); struct header_def { const char *name; @@ -343,6 +344,10 @@ static void check_header(char *line, struct header_def *header) int len = header[i].namelen; if (!strncasecmp(line, header[i].name, len) && line[len] == ':' && isspace(line[len + 1])) { + /* Unwrap inline B and Q encoding, and optionally + * normalize the meta information to utf8. + */ + decode_header_bq(line + len + 2); header[i].func(line + len + 2); break; } @@ -597,13 +602,6 @@ static void handle_info(void) cleanup_space(email); cleanup_space(sub); - /* Unwrap inline B and Q encoding, and optionally - * normalize the meta information to utf8. - */ - decode_header_bq(name); - decode_header_bq(date); - decode_header_bq(email); - decode_header_bq(sub); printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", name, email, sub, date); } From 8b4525fb3c6d79bd3a64b8f441237a4095db4e22 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:47:28 -0600 Subject: [PATCH 3/6] Refactor commit messge handling. - Move handle_info into main so it is called once after everything has been parsed. This allows the removal of a static variable and removes two duplicate calls. - Move parsing of inbody headers into handle_commit. This means we parse the in-body headers after we have decoded the character set, and it removes code duplication between handle_multipart_one_part and handle_body. - Change the flag indicating that we have seen an in body prefix header into another bit in seen. This is a little more general and allows the possibility of parsing in body headers after the body message has begun. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 58 +++++++++++++++++++++--------------------------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index bee7b202cf..3fa9505313 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -237,38 +237,41 @@ static int eatspace(char *line) #define SEEN_FROM 01 #define SEEN_DATE 02 #define SEEN_SUBJECT 04 +#define SEEN_PREFIX 0x08 /* First lines of body can have From:, Date:, and Subject: */ -static int handle_inbody_header(int *seen, char *line) +static void handle_inbody_header(int *seen, char *line) { + if (*seen & SEEN_PREFIX) + return; if (!memcmp("From:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_FROM) && handle_from(line+6)) { *seen |= SEEN_FROM; - return 1; + return; } } if (!memcmp("Date:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_DATE)) { handle_date(line+6); *seen |= SEEN_DATE; - return 1; + return; } } if (!memcmp("Subject:", line, 8) && isspace(line[8])) { if (!(*seen & SEEN_SUBJECT)) { handle_subject(line+9); *seen |= SEEN_SUBJECT; - return 1; + return; } } if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { if (!(*seen & SEEN_SUBJECT)) { handle_subject(line); *seen |= SEEN_SUBJECT; - return 1; + return; } } - return 0; + *seen |= SEEN_PREFIX; } static char *cleanup_subject(char *subject) @@ -590,12 +593,7 @@ static void decode_transfer_encoding(char *line) static void handle_info(void) { char *sub; - static int done_info = 0; - - if (done_info) - return; - done_info = 1; sub = cleanup_subject(subject); cleanup_space(name); cleanup_space(date); @@ -609,7 +607,7 @@ static void handle_info(void) /* We are inside message body and have read line[] already. * Spit out the commit log. */ -static int handle_commit_msg(void) +static int handle_commit_msg(int *seen) { if (!cmitmsg) return 0; @@ -633,6 +631,11 @@ static int handle_commit_msg(void) decode_transfer_encoding(line); if (metainfo_charset) convert_to_utf8(line, charset); + + handle_inbody_header(seen, line); + if (!(*seen & SEEN_PREFIX)) + continue; + fputs(line, cmitmsg); } while (fgets(line, sizeof(line), stdin) != NULL); fclose(cmitmsg); @@ -664,26 +667,16 @@ static void handle_patch(void) * that the first part to contain commit message and a patch, and * handle other parts as pure patches. */ -static int handle_multipart_one_part(void) +static int handle_multipart_one_part(int *seen) { - int seen = 0; int n = 0; - int len; while (fgets(line, sizeof(line), stdin) != NULL) { again: - len = eatspace(line); n++; - if (!len) - continue; if (is_multipart_boundary(line)) break; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - if (handle_commit_msg()) + if (handle_commit_msg(seen)) goto again; handle_patch(); break; @@ -695,6 +688,7 @@ static int handle_multipart_one_part(void) static void handle_multipart_body(void) { + int seen = 0; int part_num = 0; /* Skip up to the first boundary */ @@ -709,7 +703,7 @@ static void handle_multipart_body(void) while (1) { int hdr = read_one_header_line(line, sizeof(line), stdin); if (!hdr) { - if (handle_multipart_one_part() < 0) + if (handle_multipart_one_part(&seen) < 0) return; /* Reset per part headers */ transfer_encoding = TE_DONTCARE; @@ -730,18 +724,9 @@ static void handle_body(void) { int seen = 0; - while (fgets(line, sizeof(line), stdin) != NULL) { - int len = eatspace(line); - if (!len) - continue; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - handle_commit_msg(); + if (fgets(line, sizeof(line), stdin) != NULL) { + handle_commit_msg(&seen); handle_patch(); - break; } fclose(patchfile); if (!patch_lines) { @@ -791,6 +776,7 @@ int main(int argc, char **argv) handle_multipart_body(); else handle_body(); + handle_info(); break; } check_header_line(line); From 1f36bee67e604735bc48be7fc731a823e6c5807f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:49:00 -0600 Subject: [PATCH 4/6] In handle_body only read a line if we don't already have one. This prepares for detecting non-email patches that don't have mail headers. In which case we have already read the first line so handle_body should not ignore it. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mailinfo.c b/mailinfo.c index 3fa9505313..99989c25b2 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -724,7 +724,7 @@ static void handle_body(void) { int seen = 0; - if (fgets(line, sizeof(line), stdin) != NULL) { + if (line[0] || fgets(line, sizeof(line), stdin) != NULL) { handle_commit_msg(&seen); handle_patch(); } From f30b20282babcd77bcadef70b4e36e24cd1f6d59 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:53:20 -0600 Subject: [PATCH 5/6] More accurately detect header lines in read_one_header_line Only count lines of the form '^.*: ' and '^From ' as email header lines. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index 99989c25b2..a2b15e2624 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -385,20 +385,29 @@ static int read_one_header_line(char *line, int sz, FILE *in) { int ofs = 0; while (ofs < sz) { + const char *colon; int peek, len; if (fgets(line + ofs, sz - ofs, in) == NULL) - return ofs; + break; len = eatspace(line + ofs); if (len == 0) - return ofs; - peek = fgetc(in); ungetc(peek, in); - if (peek == ' ' || peek == '\t') { - /* Yuck, 2822 header "folding" */ - ofs += len; - continue; + break; + colon = strchr(line, ':'); + if (!colon || !isspace(colon[1])) { + /* Re-add the newline */ + line[ofs + len] = '\n'; + line[ofs + len + 1] = '\0'; + break; } - return ofs + len; + ofs += len; + /* Yuck, 2822 header "folding" */ + peek = fgetc(in); ungetc(peek, in); + if (peek != ' ' && peek != '\t') + break; } + /* Count mbox From headers as headers */ + if (!ofs && !memcmp(line, "From ", 5)) + ofs = 1; return ofs; } From 2dec02b1ecafc47d4031d0a68a94c775a6a9ff9e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:58:36 -0600 Subject: [PATCH 6/6] Allow in body headers beyond the in body header prefix. - handle_from is fixed to not mangle it's input line. - Then handle_inbody_header is allowed to look in the body of a commit message for additional headers that we haven't already seen. This allows patches with all of the right information in unfortunate places to be imported. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index a2b15e2624..241bfb9e25 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -72,11 +72,14 @@ static int bogus_from(char *line) return 1; } -static int handle_from(char *line) +static int handle_from(char *in_line) { - char *at = strchr(line, '@'); + char line[1000]; + char *at; char *dst; + strcpy(line, in_line); + at = strchr(line, '@'); if (!at) return bogus_from(line); @@ -242,8 +245,6 @@ static int eatspace(char *line) /* First lines of body can have From:, Date:, and Subject: */ static void handle_inbody_header(int *seen, char *line) { - if (*seen & SEEN_PREFIX) - return; if (!memcmp("From:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_FROM) && handle_from(line+6)) { *seen |= SEEN_FROM;