You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
174 lines
4.1 KiB
174 lines
4.1 KiB
diff --git a/mime.c b/mime.c |
|
index 45de80a..f9fbadf 100644 |
|
--- a/mime.c |
|
+++ b/mime.c |
|
@@ -1109,16 +1109,34 @@ fromhdr_end: |
|
} |
|
|
|
/* |
|
+ * return length of this UTF-8 codepoint in bytes |
|
+ */ |
|
+static size_t |
|
+codepointsize(char tc) |
|
+{ |
|
+ int rv = 0; |
|
+ if ( ! ( tc & 0x80 ) ) |
|
+ return 1; |
|
+ while ( tc & 0x80 ) |
|
+ { |
|
+ rv++; |
|
+ tc = tc<<1; |
|
+ } |
|
+ return rv; |
|
+} |
|
+ |
|
+/* |
|
* Convert header fields to RFC 1522 format and write to the file fo. |
|
*/ |
|
static size_t |
|
mime_write_tohdr(struct str *in, FILE *fo) |
|
{ |
|
char *upper, *wbeg, *wend, *charset, *lastwordend = NULL, *lastspc, b, |
|
- *charset7; |
|
+ *charset7, *cp; |
|
struct str cin, cout; |
|
- size_t sz = 0, col = 0, wr, charsetlen, charset7len; |
|
+ size_t sz = 0, col = 0, wr, charsetlen, charset7len, cpsz; |
|
int quoteany, mustquote, broken, |
|
+ maxin, maxout, curin, cps, |
|
maxcol = 65 /* there is the header field's name, too */; |
|
|
|
upper = in->s + in->l; |
|
@@ -1134,41 +1152,75 @@ mime_write_tohdr(struct str *in, FILE *fo) |
|
if (mustquote_hdr(wbeg, wbeg == in->s, wbeg == &upper[-1])) |
|
quoteany++; |
|
} |
|
+ |
|
+ /* |
|
+ * rfc2047 says we cannot split multi-byte characters over |
|
+ * encoded words, so we need to know if we're a multi-byte |
|
+ * source stream (UTF-8 specifically) or just an 8 bit |
|
+ * stream like ISO-8859-15 |
|
+ * so test beginning of charset since it is valid to include |
|
+ * language in charset "UTF-8*DE" etc as per rfc 2184/2231 |
|
+ */ |
|
+ char *thisset = b&0200 ? charset : charset7; |
|
+ int is_utf8 = ( strncasecmp( thisset, "utf-8", 5 ) == 0 ); |
|
+ |
|
if (2 * quoteany > in->l) { |
|
/* |
|
* Print the entire field in base64. |
|
*/ |
|
- for (wbeg = in->s; wbeg < upper; wbeg = wend) { |
|
+ for (wbeg = in->s; wbeg < upper; ) { |
|
wend = upper; |
|
cin.s = wbeg; |
|
- for (;;) { |
|
- cin.l = wend - wbeg; |
|
- if (cin.l * 4/3 + 7 + charsetlen |
|
- < maxcol - col) { |
|
- fprintf(fo, "=?%s?B?", |
|
- b&0200 ? charset : charset7); |
|
- wr = mime_write_tob64(&cin, fo, 1); |
|
- fwrite("?=", sizeof (char), 2, fo); |
|
- wr += 7 + charsetlen; |
|
- sz += wr, col += wr; |
|
- if (wend < upper) { |
|
- fwrite("\n ", sizeof (char), |
|
- 2, fo); |
|
- sz += 2; |
|
- col = 0; |
|
- maxcol = 76; |
|
+ /* |
|
+ * we calculate the maximum number of bytes |
|
+ * we can use on this output line, and then what |
|
+ * this equates to as base64 encoded source bytes |
|
+ */ |
|
+ maxout = maxcol - col - 7 - charsetlen; |
|
+ maxin = (maxout - (maxout & 0x03)) * 3/4; |
|
+ |
|
+ /* short enough to finish ? */ |
|
+ if (maxin > upper - wbeg ) |
|
+ { |
|
+ curin = upper - wbeg; |
|
+ wbeg += curin; |
|
+ }else |
|
+ { |
|
+ if (is_utf8) |
|
+ { |
|
+ /* |
|
+ * now scan the input from the beginning |
|
+ * to see how many codepoints will fit |
|
+ */ |
|
+ curin = 0; |
|
+ while (curin < maxin |
|
+ && (cpsz = codepointsize(*wbeg)) <= (maxin - curin)) |
|
+ { |
|
+ curin += cpsz; |
|
+ wbeg += cpsz; |
|
} |
|
- break; |
|
- } else { |
|
- if (col) { |
|
- fprintf(fo, "\n "); |
|
- sz += 2; |
|
- col = 0; |
|
- maxcol = 76; |
|
- } else |
|
- wend -= 4; |
|
+ }else |
|
+ { |
|
+ curin = maxin; |
|
+ wbeg += maxin; |
|
} |
|
} |
|
+ cin.l = curin; |
|
+ fprintf(fo, "%s=?%s?B?", (cin.s != in->s) ? " " : "", thisset ); |
|
+ wr = mime_write_tob64(&cin, fo, 1); |
|
+ |
|
+ if (wbeg < upper) |
|
+ { |
|
+ wr += fwrite("?=\n ", sizeof (char), 4, fo) * sizeof (char); |
|
+ }else |
|
+ { |
|
+ wr += fwrite("?=", sizeof (char), 2, fo) * sizeof (char); |
|
+ } |
|
+ |
|
+ /* and shuffle pointers and counts */ |
|
+ col = 1; |
|
+ maxcol = 76; |
|
+ sz += wr + 7 + charsetlen + ((cin.s != in->s) ? 1 : 0 ); |
|
} |
|
} else { |
|
/* |
|
@@ -1243,7 +1295,29 @@ mime_write_tohdr(struct str *in, FILE *fo) |
|
maxcol -= wbeg - |
|
lastspc; |
|
} else { |
|
- wend -= 4; |
|
+ if (is_utf8) |
|
+ { |
|
+ /* |
|
+ * make sure wend is not pointing to |
|
+ * the middle of a codepoint |
|
+ */ |
|
+ cp = wend; |
|
+ while (--cp > wbeg) |
|
+ { |
|
+ cps = codepointsize(*cp); |
|
+ if (cps > 1) |
|
+ { |
|
+ if (wend - cp - cps > 4) |
|
+ wend -= 4; |
|
+ else |
|
+ wend = cp; |
|
+ break; |
|
+ } |
|
+ } |
|
+ if (cp == wbeg) |
|
+ wend -= 4; |
|
+ } else |
|
+ wend -= 4; |
|
} |
|
free(cout.s); |
|
}
|
|
|