|
|
commit c26d0004e779316830d93120dbfe98f6eee0783b |
|
|
Author: Pranjal Jumde <pjumde@apple.com> |
|
|
Date: Tue Mar 1 15:18:04 2016 -0800 |
|
|
|
|
|
Heap-based buffer overread in htmlCurrentChar |
|
|
|
|
|
For https://bugzilla.gnome.org/show_bug.cgi?id=758606 |
|
|
|
|
|
* parserInternals.c: |
|
|
(xmlNextChar): Add an test to catch other issues on ctxt->input |
|
|
corruption proactively. |
|
|
For non-UTF-8 charsets, xmlNextChar() failed to check for the end |
|
|
of the input buffer and would continuing reading. Fix this by |
|
|
pulling out the check for the end of the input buffer into common |
|
|
code, and return if we reach the end of the input buffer |
|
|
prematurely. |
|
|
* result/HTML/758606.html: Added. |
|
|
* result/HTML/758606.html.err: Added. |
|
|
* result/HTML/758606.html.sax: Added. |
|
|
* result/HTML/758606_2.html: Added. |
|
|
* result/HTML/758606_2.html.err: Added. |
|
|
* result/HTML/758606_2.html.sax: Added. |
|
|
* test/HTML/758606.html: Added test case. |
|
|
* test/HTML/758606_2.html: Added test case. |
|
|
|
|
|
diff --git a/parserInternals.c b/parserInternals.c |
|
|
index 1fe1f6a..341d6a1 100644 |
|
|
--- a/parserInternals.c |
|
|
+++ b/parserInternals.c |
|
|
@@ -55,6 +55,10 @@ |
|
|
#include <libxml/globals.h> |
|
|
#include <libxml/chvalid.h> |
|
|
|
|
|
+#define CUR(ctxt) ctxt->input->cur |
|
|
+#define END(ctxt) ctxt->input->end |
|
|
+#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) |
|
|
+ |
|
|
#include "buf.h" |
|
|
#include "enc.h" |
|
|
|
|
|
@@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt) |
|
|
(ctxt->input == NULL)) |
|
|
return; |
|
|
|
|
|
- if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
|
|
- if ((*ctxt->input->cur == 0) && |
|
|
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && |
|
|
- (ctxt->instate != XML_PARSER_COMMENT)) { |
|
|
- /* |
|
|
- * If we are at the end of the current entity and |
|
|
- * the context allows it, we pop consumed entities |
|
|
- * automatically. |
|
|
- * the auto closing should be blocked in other cases |
|
|
- */ |
|
|
+ if (!(VALID_CTXT(ctxt))) { |
|
|
+ xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); |
|
|
+ ctxt->errNo = XML_ERR_INTERNAL_ERROR; |
|
|
+ xmlStopParser(ctxt); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ if ((*ctxt->input->cur == 0) && |
|
|
+ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { |
|
|
+ if ((ctxt->instate != XML_PARSER_COMMENT)) |
|
|
xmlPopInput(ctxt); |
|
|
- } else { |
|
|
- const unsigned char *cur; |
|
|
- unsigned char c; |
|
|
+ return; |
|
|
+ } |
|
|
|
|
|
- /* |
|
|
- * 2.11 End-of-Line Handling |
|
|
- * the literal two-character sequence "#xD#xA" or a standalone |
|
|
- * literal #xD, an XML processor must pass to the application |
|
|
- * the single character #xA. |
|
|
- */ |
|
|
- if (*(ctxt->input->cur) == '\n') { |
|
|
- ctxt->input->line++; ctxt->input->col = 1; |
|
|
- } else |
|
|
- ctxt->input->col++; |
|
|
+ if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
|
|
+ const unsigned char *cur; |
|
|
+ unsigned char c; |
|
|
|
|
|
- /* |
|
|
- * We are supposed to handle UTF8, check it's valid |
|
|
- * From rfc2044: encoding of the Unicode values on UTF-8: |
|
|
- * |
|
|
- * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
|
|
- * 0000 0000-0000 007F 0xxxxxxx |
|
|
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
|
|
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
|
|
- * |
|
|
- * Check for the 0x110000 limit too |
|
|
- */ |
|
|
- cur = ctxt->input->cur; |
|
|
+ /* |
|
|
+ * 2.11 End-of-Line Handling |
|
|
+ * the literal two-character sequence "#xD#xA" or a standalone |
|
|
+ * literal #xD, an XML processor must pass to the application |
|
|
+ * the single character #xA. |
|
|
+ */ |
|
|
+ if (*(ctxt->input->cur) == '\n') { |
|
|
+ ctxt->input->line++; ctxt->input->col = 1; |
|
|
+ } else |
|
|
+ ctxt->input->col++; |
|
|
|
|
|
- c = *cur; |
|
|
- if (c & 0x80) { |
|
|
- if (c == 0xC0) |
|
|
- goto encoding_error; |
|
|
- if (cur[1] == 0) { |
|
|
+ /* |
|
|
+ * We are supposed to handle UTF8, check it's valid |
|
|
+ * From rfc2044: encoding of the Unicode values on UTF-8: |
|
|
+ * |
|
|
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
|
|
+ * 0000 0000-0000 007F 0xxxxxxx |
|
|
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
|
|
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
|
|
+ * |
|
|
+ * Check for the 0x110000 limit too |
|
|
+ */ |
|
|
+ cur = ctxt->input->cur; |
|
|
+ |
|
|
+ c = *cur; |
|
|
+ if (c & 0x80) { |
|
|
+ if (c == 0xC0) |
|
|
+ goto encoding_error; |
|
|
+ if (cur[1] == 0) { |
|
|
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
|
|
+ cur = ctxt->input->cur; |
|
|
+ } |
|
|
+ if ((cur[1] & 0xc0) != 0x80) |
|
|
+ goto encoding_error; |
|
|
+ if ((c & 0xe0) == 0xe0) { |
|
|
+ unsigned int val; |
|
|
+ |
|
|
+ if (cur[2] == 0) { |
|
|
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
|
|
cur = ctxt->input->cur; |
|
|
} |
|
|
- if ((cur[1] & 0xc0) != 0x80) |
|
|
+ if ((cur[2] & 0xc0) != 0x80) |
|
|
goto encoding_error; |
|
|
- if ((c & 0xe0) == 0xe0) { |
|
|
- unsigned int val; |
|
|
- |
|
|
- if (cur[2] == 0) { |
|
|
+ if ((c & 0xf0) == 0xf0) { |
|
|
+ if (cur[3] == 0) { |
|
|
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
|
|
cur = ctxt->input->cur; |
|
|
} |
|
|
- if ((cur[2] & 0xc0) != 0x80) |
|
|
+ if (((c & 0xf8) != 0xf0) || |
|
|
+ ((cur[3] & 0xc0) != 0x80)) |
|
|
goto encoding_error; |
|
|
- if ((c & 0xf0) == 0xf0) { |
|
|
- if (cur[3] == 0) { |
|
|
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
|
|
- cur = ctxt->input->cur; |
|
|
- } |
|
|
- if (((c & 0xf8) != 0xf0) || |
|
|
- ((cur[3] & 0xc0) != 0x80)) |
|
|
- goto encoding_error; |
|
|
- /* 4-byte code */ |
|
|
- ctxt->input->cur += 4; |
|
|
- val = (cur[0] & 0x7) << 18; |
|
|
- val |= (cur[1] & 0x3f) << 12; |
|
|
- val |= (cur[2] & 0x3f) << 6; |
|
|
- val |= cur[3] & 0x3f; |
|
|
- } else { |
|
|
- /* 3-byte code */ |
|
|
- ctxt->input->cur += 3; |
|
|
- val = (cur[0] & 0xf) << 12; |
|
|
- val |= (cur[1] & 0x3f) << 6; |
|
|
- val |= cur[2] & 0x3f; |
|
|
- } |
|
|
- if (((val > 0xd7ff) && (val < 0xe000)) || |
|
|
- ((val > 0xfffd) && (val < 0x10000)) || |
|
|
- (val >= 0x110000)) { |
|
|
- xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, |
|
|
- "Char 0x%X out of allowed range\n", |
|
|
- val); |
|
|
- } |
|
|
- } else |
|
|
- /* 2-byte code */ |
|
|
- ctxt->input->cur += 2; |
|
|
+ /* 4-byte code */ |
|
|
+ ctxt->input->cur += 4; |
|
|
+ val = (cur[0] & 0x7) << 18; |
|
|
+ val |= (cur[1] & 0x3f) << 12; |
|
|
+ val |= (cur[2] & 0x3f) << 6; |
|
|
+ val |= cur[3] & 0x3f; |
|
|
+ } else { |
|
|
+ /* 3-byte code */ |
|
|
+ ctxt->input->cur += 3; |
|
|
+ val = (cur[0] & 0xf) << 12; |
|
|
+ val |= (cur[1] & 0x3f) << 6; |
|
|
+ val |= cur[2] & 0x3f; |
|
|
+ } |
|
|
+ if (((val > 0xd7ff) && (val < 0xe000)) || |
|
|
+ ((val > 0xfffd) && (val < 0x10000)) || |
|
|
+ (val >= 0x110000)) { |
|
|
+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, |
|
|
+ "Char 0x%X out of allowed range\n", |
|
|
+ val); |
|
|
+ } |
|
|
} else |
|
|
- /* 1-byte code */ |
|
|
- ctxt->input->cur++; |
|
|
+ /* 2-byte code */ |
|
|
+ ctxt->input->cur += 2; |
|
|
+ } else |
|
|
+ /* 1-byte code */ |
|
|
+ ctxt->input->cur++; |
|
|
|
|
|
- ctxt->nbChars++; |
|
|
- if (*ctxt->input->cur == 0) |
|
|
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
|
|
- } |
|
|
+ ctxt->nbChars++; |
|
|
+ if (*ctxt->input->cur == 0) |
|
|
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
|
|
} else { |
|
|
/* |
|
|
* Assume it's a fixed length encoding (1) with |
|
|
diff --git a/result/HTML/758606.html b/result/HTML/758606.html |
|
|
new file mode 100644 |
|
|
index 0000000..4f21f62 |
|
|
--- /dev/null |
|
|
+++ b/result/HTML/758606.html |
|
|
@@ -0,0 +1,2 @@ |
|
|
+<!DOCTYPE > |
|
|
+ |
|
|
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err |
|
|
new file mode 100644 |
|
|
index 0000000..060433a |
|
|
--- /dev/null |
|
|
+++ b/result/HTML/758606.html.err |
|
|
@@ -0,0 +1,16 @@ |
|
|
+./test/HTML/758606.html:1: HTML parser error : Comment not terminated |
|
|
+<!-- |
|
|
+<!--<!doctype |
|
|
+ ^ |
|
|
+./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC |
|
|
+<!--<!doctype |
|
|
+ ^ |
|
|
+./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration |
|
|
+<!--<!doctype |
|
|
+ ^ |
|
|
+./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name ! |
|
|
+ |
|
|
+^ |
|
|
+./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated |
|
|
+ |
|
|
+^ |
|
|
diff --git a/result/HTML/758606.html.sax b/result/HTML/758606.html.sax |
|
|
new file mode 100644 |
|
|
index 0000000..d44a5cf |
|
|
--- /dev/null |
|
|
+++ b/result/HTML/758606.html.sax |
|
|
@@ -0,0 +1,10 @@ |
|
|
+SAX.setDocumentLocator() |
|
|
+SAX.startDocument() |
|
|
+SAX.error: Comment not terminated |
|
|
+<!-- |
|
|
+SAX.error: Invalid char in CDATA 0xC |
|
|
+SAX.error: Misplaced DOCTYPE declaration |
|
|
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name ! |
|
|
+SAX.error: DOCTYPE improperly terminated |
|
|
+SAX.internalSubset((null), , ) |
|
|
+SAX.endDocument() |
|
|
diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html |
|
|
new file mode 100644 |
|
|
index 0000000..273816a |
|
|
--- /dev/null |
|
|
+++ b/result/HTML/758606_2.html |
|
|
@@ -0,0 +1,2 @@ |
|
|
+<!DOCTYPE > |
|
|
+<html><body><p>‘</p></body></html> |
|
|
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err |
|
|
new file mode 100644 |
|
|
index 0000000..4be039f |
|
|
--- /dev/null |
|
|
+++ b/result/HTML/758606_2.html.err |
|
|
@@ -0,0 +1,16 @@ |
|
|
+./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated |
|
|
+<!-- |
|
|
+<!--<EFBFBD><!dOctYPE |
|
|
+ ^ |
|
|
+./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC |
|
|
+<!--<EFBFBD><!dOctYPE |
|
|
+ ^ |
|
|
+./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration |
|
|
+<!dOctYPE |
|
|
+ ^ |
|
|
+./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name ! |
|
|
+ |
|
|
+^ |
|
|
+./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated |
|
|
+ |
|
|
+^ |
|
|
diff --git a/result/HTML/758606_2.html.sax b/result/HTML/758606_2.html.sax |
|
|
new file mode 100644 |
|
|
index 0000000..80ff3d7 |
|
|
--- /dev/null |
|
|
+++ b/result/HTML/758606_2.html.sax |
|
|
@@ -0,0 +1,17 @@ |
|
|
+SAX.setDocumentLocator() |
|
|
+SAX.startDocument() |
|
|
+SAX.error: Comment not terminated |
|
|
+<!-- |
|
|
+SAX.error: Invalid char in CDATA 0xC |
|
|
+SAX.startElement(html) |
|
|
+SAX.startElement(body) |
|
|
+SAX.startElement(p) |
|
|
+SAX.characters(‘, 2) |
|
|
+SAX.error: Misplaced DOCTYPE declaration |
|
|
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name ! |
|
|
+SAX.error: DOCTYPE improperly terminated |
|
|
+SAX.internalSubset((null), , ) |
|
|
+SAX.endElement(p) |
|
|
+SAX.endElement(body) |
|
|
+SAX.endElement(html) |
|
|
+SAX.endDocument() |
|
|
diff --git a/test/HTML/758606.html b/test/HTML/758606.html |
|
|
new file mode 100644 |
|
|
index 0000000..01a013c |
|
|
--- /dev/null |
|
|
+++ b/test/HTML/758606.html |
|
|
@@ -0,0 +1 @@ |
|
|
+<!--<!doctype |
|
|
diff --git a/test/HTML/758606_2.html b/test/HTML/758606_2.html |
|
|
new file mode 100644 |
|
|
index 0000000..daa185b |
|
|
--- /dev/null |
|
|
+++ b/test/HTML/758606_2.html |
|
|
@@ -0,0 +1 @@ |
|
|
+<!--<EFBFBD><!dOctYPE
|
|
|
|