From 5963c0367f00df0e5eeb761f1ef77a33c8f54c40 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:34 +0100 Subject: [PATCH 1/8] Add new function strbuf_add_xml_quoted() Substantially the same code is present in http-push.c and imap-send.c, so make a library function out of it. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- strbuf.c | 26 ++++++++++++++++++++++++++ strbuf.h | 6 ++++++ 2 files changed, 32 insertions(+) diff --git a/strbuf.c b/strbuf.c index 0510f76c24..41828f05d8 100644 --- a/strbuf.c +++ b/strbuf.c @@ -428,6 +428,32 @@ void strbuf_add_lines(struct strbuf *out, const char *prefix, strbuf_complete_line(out); } +void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s) +{ + while (*s) { + size_t len = strcspn(s, "\"<>&"); + strbuf_add(buf, s, len); + s += len; + switch (*s) { + case '"': + strbuf_addstr(buf, """); + break; + case '<': + strbuf_addstr(buf, "<"); + break; + case '>': + strbuf_addstr(buf, ">"); + break; + case '&': + strbuf_addstr(buf, "&"); + break; + case 0: + return; + } + s++; + } +} + static int is_rfc3986_reserved(char ch) { switch (ch) { diff --git a/strbuf.h b/strbuf.h index be941ee481..65aa2a032f 100644 --- a/strbuf.h +++ b/strbuf.h @@ -102,6 +102,12 @@ extern void strbuf_vaddf(struct strbuf *sb, const char *fmt, va_list ap); extern void strbuf_add_lines(struct strbuf *sb, const char *prefix, const char *buf, size_t size); +/* + * Append s to sb, with the characters '<', '>', '&' and '"' converted + * into XML entities. + */ +extern void strbuf_addstr_xml_quoted(struct strbuf *sb, const char *s); + static inline void strbuf_complete_line(struct strbuf *sb) { if (sb->len && sb->buf[sb->len - 1] != '\n') From 37141f27d8e0689fe465d8498656ee6543de3ea5 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:35 +0100 Subject: [PATCH 2/8] xml_entities(): use function strbuf_addstr_xml_quoted() Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- http-push.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/http-push.c b/http-push.c index 8701c1215d..9923441a4e 100644 --- a/http-push.c +++ b/http-push.c @@ -172,28 +172,7 @@ enum dav_header_flag { static char *xml_entities(const char *s) { struct strbuf buf = STRBUF_INIT; - while (*s) { - size_t len = strcspn(s, "\"<>&"); - strbuf_add(&buf, s, len); - s += len; - switch (*s) { - case '"': - strbuf_addstr(&buf, """); - break; - case '<': - strbuf_addstr(&buf, "<"); - break; - case '>': - strbuf_addstr(&buf, ">"); - break; - case '&': - strbuf_addstr(&buf, "&"); - break; - case 0: - return strbuf_detach(&buf, NULL); - } - s++; - } + strbuf_addstr_xml_quoted(&buf, s); return strbuf_detach(&buf, NULL); } From 32a8569ecf211cc2ba98333daa10b1a6e783ae87 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:36 +0100 Subject: [PATCH 3/8] lf_to_crlf(): NUL-terminate msg_data::data Through the rest of the file, the data member of struct msg_data is kept NUL-terminated, and that fact is relied upon in a couple of places. Change lf_to_crlf() to preserve this invariant. In fact, there are no execution paths in which lf_to_crlf() is called and then its data member is required to be NUL-terminated, but it is better to be consistent to prevent future confusion. Document the invariant in the struct msg_data definition. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- imap-send.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/imap-send.c b/imap-send.c index d42e471297..c818b0c7ea 100644 --- a/imap-send.c +++ b/imap-send.c @@ -69,8 +69,12 @@ struct store { }; struct msg_data { + /* NUL-terminated data: */ char *data; + + /* length of data (not including NUL): */ int len; + unsigned char flags; }; @@ -1276,7 +1280,7 @@ static void lf_to_crlf(struct msg_data *msg) lfnum++; } - new = xmalloc(msg->len + lfnum); + new = xmalloc(msg->len + lfnum + 1); if (msg->data[0] == '\n') { new[0] = '\r'; new[1] = '\n'; @@ -1297,6 +1301,7 @@ static void lf_to_crlf(struct msg_data *msg) /* otherwise it already had CR before */ new[j++] = '\n'; } + new[j] = '\0'; msg->len += lfnum; free(msg->data); msg->data = new; From 3a34e62684b6dc0edf1301eb2259132ffc233c48 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:37 +0100 Subject: [PATCH 4/8] imap-send: store all_msgs as a strbuf all_msgs is only used as a glorified string, therefore there is no reason to declare it as a struct msg_data. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- imap-send.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/imap-send.c b/imap-send.c index c818b0c7ea..50e223a2a4 100644 --- a/imap-send.c +++ b/imap-send.c @@ -1391,26 +1391,20 @@ static void wrap_in_html(struct msg_data *msg) #define CHUNKSIZE 0x1000 -static int read_message(FILE *f, struct msg_data *msg) +static int read_message(FILE *f, struct strbuf *all_msgs) { - struct strbuf buf = STRBUF_INIT; - - memset(msg, 0, sizeof(*msg)); - do { - if (strbuf_fread(&buf, CHUNKSIZE, f) <= 0) + if (strbuf_fread(all_msgs, CHUNKSIZE, f) <= 0) break; } while (!feof(f)); - msg->len = buf.len; - msg->data = strbuf_detach(&buf, NULL); - return msg->len; + return all_msgs->len; } -static int count_messages(struct msg_data *msg) +static int count_messages(struct strbuf *all_msgs) { int count = 0; - char *p = msg->data; + char *p = all_msgs->buf; while (1) { if (!prefixcmp(p, "From ")) { @@ -1431,7 +1425,7 @@ static int count_messages(struct msg_data *msg) return count; } -static int split_msg(struct msg_data *all_msgs, struct msg_data *msg, int *ofs) +static int split_msg(struct strbuf *all_msgs, struct msg_data *msg, int *ofs) { char *p, *data; @@ -1439,7 +1433,7 @@ static int split_msg(struct msg_data *all_msgs, struct msg_data *msg, int *ofs) if (*ofs >= all_msgs->len) return 0; - data = &all_msgs->data[*ofs]; + data = &all_msgs->buf[*ofs]; msg->len = all_msgs->len - *ofs; if (msg->len < 5 || prefixcmp(data, "From ")) @@ -1509,7 +1503,8 @@ static int git_imap_config(const char *key, const char *val, void *cb) int main(int argc, char **argv) { - struct msg_data all_msgs, msg; + struct strbuf all_msgs = STRBUF_INIT; + struct msg_data msg; struct store *ctx = NULL; int ofs = 0; int r; From 6360bee4cd902bdea5db5826821edffd6f367d89 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:38 +0100 Subject: [PATCH 5/8] imap-send: correctly report errors reading from stdin Previously, read_message() didn't distinguish between an error and eof when reading its input. This could have resulted in incorrect behavior if there was an error: (1) reporting "nothing to send" if no bytes were read or (2) sending an incomplete message if some bytes were read before the error. Change read_message() to return -1 on ferror()s and 0 on success, so that the caller can recognize that an error occurred. (The return value used to be the length of the input read, which was redundant because that is already available as the strbuf length. Change the caller to report errors correctly. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- imap-send.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/imap-send.c b/imap-send.c index 50e223a2a4..86cf60396c 100644 --- a/imap-send.c +++ b/imap-send.c @@ -1398,7 +1398,7 @@ static int read_message(FILE *f, struct strbuf *all_msgs) break; } while (!feof(f)); - return all_msgs->len; + return ferror(f) ? -1 : 0; } static int count_messages(struct strbuf *all_msgs) @@ -1537,7 +1537,12 @@ int main(int argc, char **argv) } /* read the messages */ - if (!read_message(stdin, &all_msgs)) { + if (read_message(stdin, &all_msgs)) { + fprintf(stderr, "error reading input\n"); + return 1; + } + + if (all_msgs.len == 0) { fprintf(stderr, "nothing to send\n"); return 1; } From f035ab620520dbf286d3303194f10a489d7c7f56 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:39 +0100 Subject: [PATCH 6/8] imap-send: change msg_data from storing (ptr, len) to storing strbuf struct msg_data stored (ptr, len) of the data to be included in a message, kept the character data NUL-terminated, etc., much like a strbuf would do. So change it to use a struct strbuf. This makes the code clearer and reduces copying a little bit. A side effect of this change is that the memory for each message is freed after it is used rather than leaked, though that detail is unimportant given that imap-send is a top-level command. By the way, there is a bunch of infrastructure in this file for dealing with IMAP flags, although there is nothing in the code that actually allows any flags to be set. If there is no plan to add support for flags in the future, a bunch of code could be ripped out and "struct msg_data" could be completely replaced with strbuf, but that would be a separate topic. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- imap-send.c | 92 +++++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/imap-send.c b/imap-send.c index 86cf60396c..a5e0e33c2d 100644 --- a/imap-send.c +++ b/imap-send.c @@ -69,12 +69,7 @@ struct store { }; struct msg_data { - /* NUL-terminated data: */ - char *data; - - /* length of data (not including NUL): */ - int len; - + struct strbuf data; unsigned char flags; }; @@ -1268,46 +1263,49 @@ static int imap_make_flags(int flags, char *buf) return d; } -static void lf_to_crlf(struct msg_data *msg) +static void lf_to_crlf(struct strbuf *msg) { + size_t new_len; char *new; int i, j, lfnum = 0; - if (msg->data[0] == '\n') + if (msg->buf[0] == '\n') lfnum++; for (i = 1; i < msg->len; i++) { - if (msg->data[i - 1] != '\r' && msg->data[i] == '\n') + if (msg->buf[i - 1] != '\r' && msg->buf[i] == '\n') lfnum++; } - new = xmalloc(msg->len + lfnum + 1); - if (msg->data[0] == '\n') { + new_len = msg->len + lfnum; + new = xmalloc(new_len + 1); + if (msg->buf[0] == '\n') { new[0] = '\r'; new[1] = '\n'; i = 1; j = 2; } else { - new[0] = msg->data[0]; + new[0] = msg->buf[0]; i = 1; j = 1; } for ( ; i < msg->len; i++) { - if (msg->data[i] != '\n') { - new[j++] = msg->data[i]; + if (msg->buf[i] != '\n') { + new[j++] = msg->buf[i]; continue; } - if (msg->data[i - 1] != '\r') + if (msg->buf[i - 1] != '\r') new[j++] = '\r'; /* otherwise it already had CR before */ new[j++] = '\n'; } - new[j] = '\0'; - msg->len += lfnum; - free(msg->data); - msg->data = new; + strbuf_attach(msg, new, new_len, new_len + 1); } -static int imap_store_msg(struct store *gctx, struct msg_data *data) +/* + * Store msg to IMAP. Also detach and free the data from msg->data, + * leaving msg->data empty. + */ +static int imap_store_msg(struct store *gctx, struct msg_data *msg) { struct imap_store *ctx = (struct imap_store *)gctx; struct imap *imap = ctx->imap; @@ -1316,16 +1314,15 @@ static int imap_store_msg(struct store *gctx, struct msg_data *data) int ret, d; char flagstr[128]; - lf_to_crlf(data); + lf_to_crlf(&msg->data); memset(&cb, 0, sizeof(cb)); - cb.dlen = data->len; - cb.data = xmalloc(cb.dlen); - memcpy(cb.data, data->data, data->len); + cb.dlen = msg->data.len; + cb.data = strbuf_detach(&msg->data, NULL); d = 0; - if (data->flags) { - d = imap_make_flags(data->flags, flagstr); + if (msg->flags) { + d = imap_make_flags(msg->flags, flagstr); flagstr[d++] = ' '; } flagstr[d] = 0; @@ -1356,7 +1353,8 @@ static void encode_html_chars(struct strbuf *p) strbuf_splice(p, i, 1, """, 6); } } -static void wrap_in_html(struct msg_data *msg) + +static void wrap_in_html(struct strbuf *msg) { struct strbuf buf = STRBUF_INIT; struct strbuf **lines; @@ -1366,9 +1364,7 @@ static void wrap_in_html(struct msg_data *msg) static char *pre_close = "\n"; int added_header = 0; - strbuf_attach(&buf, msg->data, msg->len, msg->len); - lines = strbuf_split(&buf, '\n'); - strbuf_release(&buf); + lines = strbuf_split(msg, '\n'); for (p = lines; *p; p++) { if (! added_header) { if ((*p)->len == 1 && *((*p)->buf) == '\n') { @@ -1385,8 +1381,8 @@ static void wrap_in_html(struct msg_data *msg) } strbuf_addstr(&buf, pre_close); strbuf_list_free(lines); - msg->len = buf.len; - msg->data = strbuf_detach(&buf, NULL); + strbuf_release(msg); + *msg = buf; } #define CHUNKSIZE 0x1000 @@ -1425,34 +1421,39 @@ static int count_messages(struct strbuf *all_msgs) return count; } -static int split_msg(struct strbuf *all_msgs, struct msg_data *msg, int *ofs) +/* + * Copy the next message from all_msgs, starting at offset *ofs, to + * msg. Update *ofs to the start of the following message. Return + * true iff a message was successfully copied. + */ +static int split_msg(struct strbuf *all_msgs, struct strbuf *msg, int *ofs) { char *p, *data; + size_t len; - memset(msg, 0, sizeof *msg); if (*ofs >= all_msgs->len) return 0; data = &all_msgs->buf[*ofs]; - msg->len = all_msgs->len - *ofs; + len = all_msgs->len - *ofs; - if (msg->len < 5 || prefixcmp(data, "From ")) + if (len < 5 || prefixcmp(data, "From ")) return 0; p = strchr(data, '\n'); if (p) { - p = &p[1]; - msg->len -= p-data; - *ofs += p-data; + p++; + len -= p - data; + *ofs += p - data; data = p; } p = strstr(data, "\nFrom "); if (p) - msg->len = &p[1] - data; + len = &p[1] - data; - msg->data = xmemdupz(data, msg->len); - *ofs += msg->len; + strbuf_add(msg, data, len); + *ofs += len; return 1; } @@ -1504,7 +1505,7 @@ static int git_imap_config(const char *key, const char *val, void *cb) int main(int argc, char **argv) { struct strbuf all_msgs = STRBUF_INIT; - struct msg_data msg; + struct msg_data msg = {STRBUF_INIT, 0}; struct store *ctx = NULL; int ofs = 0; int r; @@ -1564,11 +1565,12 @@ int main(int argc, char **argv) ctx->name = imap_folder; while (1) { unsigned percent = n * 100 / total; + fprintf(stderr, "%4u%% (%d/%d) done\r", percent, n, total); - if (!split_msg(&all_msgs, &msg, &ofs)) + if (!split_msg(&all_msgs, &msg.data, &ofs)) break; if (server.use_html) - wrap_in_html(&msg); + wrap_in_html(&msg.data); r = imap_store_msg(ctx, &msg); if (r != DRV_OK) break; From 3c64063558d57735384d3c547c0138aca7dfd3b8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:40 +0100 Subject: [PATCH 7/8] wrap_in_html(): use strbuf_addstr_xml_quoted() Use the new function to quote characters as they are being added to buf, rather than quoting them in *p and then copying them into buf. This increases code sharing, and changes the algorithm from O(N^2) to O(N) in the number of characters in a line. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- imap-send.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/imap-send.c b/imap-send.c index a5e0e33c2d..b73c913f4e 100644 --- a/imap-send.c +++ b/imap-send.c @@ -1339,21 +1339,6 @@ static int imap_store_msg(struct store *gctx, struct msg_data *msg) return DRV_OK; } -static void encode_html_chars(struct strbuf *p) -{ - int i; - for (i = 0; i < p->len; i++) { - if (p->buf[i] == '&') - strbuf_splice(p, i, 1, "&", 5); - if (p->buf[i] == '<') - strbuf_splice(p, i, 1, "<", 4); - if (p->buf[i] == '>') - strbuf_splice(p, i, 1, ">", 4); - if (p->buf[i] == '"') - strbuf_splice(p, i, 1, """, 6); - } -} - static void wrap_in_html(struct strbuf *msg) { struct strbuf buf = STRBUF_INIT; @@ -1372,12 +1357,12 @@ static void wrap_in_html(struct strbuf *msg) strbuf_addbuf(&buf, *p); strbuf_addstr(&buf, pre_open); added_header = 1; - continue; + } else { + strbuf_addbuf(&buf, *p); } + } else { + strbuf_addstr_xml_quoted(&buf, (*p)->buf); } - else - encode_html_chars(*p); - strbuf_addbuf(&buf, *p); } strbuf_addstr(&buf, pre_close); strbuf_list_free(lines); From 118a68f9ddbca6ffd87ab6e32b60771143d9d014 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 25 Nov 2012 12:08:41 +0100 Subject: [PATCH 8/8] wrap_in_html(): process message in bulk rather than line-by-line Now that we can xml-quote an arbitrary string in O(N), there is no reason to process the message line by line. This change saves lots of memory allocations and copying. The old code would have created invalid output when there was no body, emitting a closing without a blank line nor an opening
 after the header.  The new code simply returns in this
situation without doing harm (even though either would not make much
sense in the context of imap-send that is meant to send out patches).

Signed-off-by: Michael Haggerty 
Signed-off-by: Junio C Hamano 
---
 imap-send.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/imap-send.c b/imap-send.c
index b73c913f4e..e521e2fd22 100644
--- a/imap-send.c
+++ b/imap-send.c
@@ -1342,30 +1342,23 @@ static int imap_store_msg(struct store *gctx, struct msg_data *msg)
 static void wrap_in_html(struct strbuf *msg)
 {
 	struct strbuf buf = STRBUF_INIT;
-	struct strbuf **lines;
-	struct strbuf **p;
 	static char *content_type = "Content-Type: text/html;\n";
 	static char *pre_open = "
\n";
 	static char *pre_close = "
\n"; - int added_header = 0; + const char *body = strstr(msg->buf, "\n\n"); - lines = strbuf_split(msg, '\n'); - for (p = lines; *p; p++) { - if (! added_header) { - if ((*p)->len == 1 && *((*p)->buf) == '\n') { - strbuf_addstr(&buf, content_type); - strbuf_addbuf(&buf, *p); - strbuf_addstr(&buf, pre_open); - added_header = 1; - } else { - strbuf_addbuf(&buf, *p); - } - } else { - strbuf_addstr_xml_quoted(&buf, (*p)->buf); - } - } + if (!body) + return; /* Headers but no body; no wrapping needed */ + + body += 2; + + strbuf_add(&buf, msg->buf, body - msg->buf - 1); + strbuf_addstr(&buf, content_type); + strbuf_addch(&buf, '\n'); + strbuf_addstr(&buf, pre_open); + strbuf_addstr_xml_quoted(&buf, body); strbuf_addstr(&buf, pre_close); - strbuf_list_free(lines); + strbuf_release(msg); *msg = buf; }