diff --git a/http.c b/http.c index 6bfd0934b3..84463dff3d 100644 --- a/http.c +++ b/http.c @@ -906,6 +906,32 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) return ret; } +/* + * Check for and extract a content-type parameter. "raw" + * should be positioned at the start of the potential + * parameter, with any whitespace already removed. + * + * "name" is the name of the parameter. The value is appended + * to "out". + */ +static int extract_param(const char *raw, const char *name, + struct strbuf *out) +{ + size_t len = strlen(name); + + if (strncasecmp(raw, name, len)) + return -1; + raw += len; + + if (*raw != '=') + return -1; + raw++; + + while (*raw && !isspace(*raw)) + strbuf_addch(out, *raw++); + return 0; +} + /* * Extract a normalized version of the content type, with any * spaces suppressed, all letters lowercased, and no trailing ";" @@ -916,11 +942,15 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) * but "text/plain" is the only reasonable output, and this keeps * our code simple. * + * If the "charset" argument is not NULL, store the value of any + * charset parameter there. + * * Example: - * "TEXT/PLAIN; charset=utf-8" -> "text/plain" + * "TEXT/PLAIN; charset=utf-8" -> "text/plain", "utf-8" * "text / plain" -> "text/plain" */ -static void extract_content_type(struct strbuf *raw, struct strbuf *type) +static void extract_content_type(struct strbuf *raw, struct strbuf *type, + struct strbuf *charset) { const char *p; @@ -929,10 +959,25 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type) for (p = raw->buf; *p; p++) { if (isspace(*p)) continue; - if (*p == ';') + if (*p == ';') { + p++; break; + } strbuf_addch(type, tolower(*p)); } + + if (!charset) + return; + + strbuf_reset(charset); + while (*p) { + while (isspace(*p)) + p++; + if (!extract_param(p, "charset", charset)) + return; + while (*p && !isspace(*p)) + p++; + } } /* http_request() targets */ @@ -989,7 +1034,8 @@ static int http_request(const char *url, if (options && options->content_type) { struct strbuf raw = STRBUF_INIT; curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &raw); - extract_content_type(&raw, options->content_type); + extract_content_type(&raw, options->content_type, + options->charset); strbuf_release(&raw); } diff --git a/http.h b/http.h index e64084fe6d..473179b14d 100644 --- a/http.h +++ b/http.h @@ -143,6 +143,13 @@ struct http_get_options { /* If non-NULL, returns the content-type of the response. */ struct strbuf *content_type; + /* + * If non-NULL, and content_type above is non-NULL, returns + * the charset parameter from the content-type. If none is + * present, returns an empty string. + */ + struct strbuf *charset; + /* * If non-NULL, returns the URL we ended up at, including any * redirects we followed.