You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
222 lines
9.4 KiB
222 lines
9.4 KiB
diff --git a/Lib/httplib.py b/Lib/httplib.py |
|
index da2f346..fc8e895 100644 |
|
--- a/Lib/httplib.py |
|
+++ b/Lib/httplib.py |
|
@@ -247,6 +247,15 @@ _MAXHEADERS = 100 |
|
_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match |
|
_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search |
|
|
|
+# These characters are not allowed within HTTP URL paths. |
|
+# See https://tools.ietf.org/html/rfc3986#section-3.3 and the |
|
+# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition. |
|
+# Prevents CVE-2019-9740. Includes control characters such as \r\n. |
|
+# Restrict non-ASCII characters above \x7f (0x80-0xff). |
|
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f-\xff]') |
|
+# Arguably only these _should_ allowed: |
|
+# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$") |
|
+# We are more lenient for assumed real world compatibility purposes. |
|
|
|
class HTTPMessage(mimetools.Message): |
|
|
|
@@ -926,6 +935,12 @@ class HTTPConnection: |
|
self._method = method |
|
if not url: |
|
url = '/' |
|
+ # Prevent CVE-2019-9740. |
|
+ match = _contains_disallowed_url_pchar_re.search(url) |
|
+ if match: |
|
+ raise InvalidURL("URL can't contain control characters. %r " |
|
+ "(found at least %r)" |
|
+ % (url, match.group())) |
|
hdr = '%s %s %s' % (method, url, self._http_vsn_str) |
|
|
|
self._output(hdr) |
|
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py |
|
index 3845012..d2da0f8 100644 |
|
--- a/Lib/test/test_urllib.py |
|
+++ b/Lib/test/test_urllib.py |
|
@@ -198,6 +198,31 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): |
|
finally: |
|
self.unfakehttp() |
|
|
|
+ def test_url_with_control_char_rejected(self): |
|
+ for char_no in range(0, 0x21) + range(0x7f, 0x100): |
|
+ char = chr(char_no) |
|
+ schemeless_url = "//localhost:7777/test%s/" % char |
|
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") |
|
+ try: |
|
+ # urllib quotes the URL so there is no injection. |
|
+ resp = urllib.urlopen("http:" + schemeless_url) |
|
+ self.assertNotIn(char, resp.geturl()) |
|
+ finally: |
|
+ self.unfakehttp() |
|
+ |
|
+ def test_url_with_newline_header_injection_rejected(self): |
|
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") |
|
+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" |
|
+ schemeless_url = "//" + host + ":8080/test/?test=a" |
|
+ try: |
|
+ # urllib quotes the URL so there is no injection. |
|
+ resp = urllib.urlopen("http:" + schemeless_url) |
|
+ self.assertNotIn(' ', resp.geturl()) |
|
+ self.assertNotIn('\r', resp.geturl()) |
|
+ self.assertNotIn('\n', resp.geturl()) |
|
+ finally: |
|
+ self.unfakehttp() |
|
+ |
|
def test_read_bogus(self): |
|
# urlopen() should raise IOError for many error codes. |
|
self.fakehttp('''HTTP/1.1 401 Authentication Required |
|
@@ -786,6 +811,35 @@ class Pathname_Tests(unittest.TestCase): |
|
class Utility_Tests(unittest.TestCase): |
|
"""Testcase to test the various utility functions in the urllib.""" |
|
|
|
+ def test_splithost(self): |
|
+ splithost = urllib.splithost |
|
+ self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), |
|
+ ('www.example.org:80', '/foo/bar/baz.html')) |
|
+ self.assertEqual(splithost('//www.example.org:80'), |
|
+ ('www.example.org:80', '')) |
|
+ self.assertEqual(splithost('/foo/bar/baz.html'), |
|
+ (None, '/foo/bar/baz.html')) |
|
+ |
|
+ # bpo-30500: # starts a fragment. |
|
+ self.assertEqual(splithost('//127.0.0.1#@host.com'), |
|
+ ('127.0.0.1', '/#@host.com')) |
|
+ self.assertEqual(splithost('//127.0.0.1#@host.com:80'), |
|
+ ('127.0.0.1', '/#@host.com:80')) |
|
+ self.assertEqual(splithost('//127.0.0.1:80#@host.com'), |
|
+ ('127.0.0.1:80', '/#@host.com')) |
|
+ |
|
+ # Empty host is returned as empty string. |
|
+ self.assertEqual(splithost("///file"), |
|
+ ('', '/file')) |
|
+ |
|
+ # Trailing semicolon, question mark and hash symbol are kept. |
|
+ self.assertEqual(splithost("//example.net/file;"), |
|
+ ('example.net', '/file;')) |
|
+ self.assertEqual(splithost("//example.net/file?"), |
|
+ ('example.net', '/file?')) |
|
+ self.assertEqual(splithost("//example.net/file#"), |
|
+ ('example.net', '/file#')) |
|
+ |
|
def test_splitpasswd(self): |
|
"""Some of the password examples are not sensible, but it is added to |
|
confirming to RFC2617 and addressing issue4675. |
|
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py |
|
index c317b8d..63fefd6 100644 |
|
--- a/Lib/test/test_urllib2.py |
|
+++ b/Lib/test/test_urllib2.py |
|
@@ -7,12 +7,16 @@ import StringIO |
|
|
|
import urllib2 |
|
from urllib2 import Request, OpenerDirector |
|
+import httplib |
|
|
|
try: |
|
import ssl |
|
except ImportError: |
|
ssl = None |
|
|
|
+from test.test_urllib import FakeHTTPMixin |
|
+ |
|
+ |
|
# XXX |
|
# Request |
|
# CacheFTPHandler (hard to write) |
|
@@ -1243,7 +1247,7 @@ class HandlerTests(unittest.TestCase): |
|
self.assertEqual(len(http_handler.requests), 1) |
|
self.assertFalse(http_handler.requests[0].has_header(auth_header)) |
|
|
|
-class MiscTests(unittest.TestCase): |
|
+class MiscTests(unittest.TestCase, FakeHTTPMixin): |
|
|
|
def test_build_opener(self): |
|
class MyHTTPHandler(urllib2.HTTPHandler): pass |
|
@@ -1289,6 +1293,53 @@ class MiscTests(unittest.TestCase): |
|
else: |
|
self.assertTrue(False) |
|
|
|
+ @unittest.skipUnless(ssl, "ssl module required") |
|
+ def test_url_with_control_char_rejected(self): |
|
+ for char_no in range(0, 0x21) + range(0x7f, 0x100): |
|
+ char = chr(char_no) |
|
+ schemeless_url = "//localhost:7777/test%s/" % char |
|
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") |
|
+ try: |
|
+ # We explicitly test urllib.request.urlopen() instead of the top |
|
+ # level 'def urlopen()' function defined in this... (quite ugly) |
|
+ # test suite. They use different url opening codepaths. Plain |
|
+ # urlopen uses FancyURLOpener which goes via a codepath that |
|
+ # calls urllib.parse.quote() on the URL which makes all of the |
|
+ # above attempts at injection within the url _path_ safe. |
|
+ escaped_char_repr = repr(char).replace('\\', r'\\') |
|
+ InvalidURL = httplib.InvalidURL |
|
+ with self.assertRaisesRegexp( |
|
+ InvalidURL, "contain control.*" + escaped_char_repr): |
|
+ urllib2.urlopen("http:" + schemeless_url) |
|
+ with self.assertRaisesRegexp( |
|
+ InvalidURL, "contain control.*" + escaped_char_repr): |
|
+ urllib2.urlopen("https:" + schemeless_url) |
|
+ finally: |
|
+ self.unfakehttp() |
|
+ |
|
+ @unittest.skipUnless(ssl, "ssl module required") |
|
+ def test_url_with_newline_header_injection_rejected(self): |
|
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") |
|
+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" |
|
+ schemeless_url = "//" + host + ":8080/test/?test=a" |
|
+ try: |
|
+ # We explicitly test urllib2.urlopen() instead of the top |
|
+ # level 'def urlopen()' function defined in this... (quite ugly) |
|
+ # test suite. They use different url opening codepaths. Plain |
|
+ # urlopen uses FancyURLOpener which goes via a codepath that |
|
+ # calls urllib.parse.quote() on the URL which makes all of the |
|
+ # above attempts at injection within the url _path_ safe. |
|
+ InvalidURL = httplib.InvalidURL |
|
+ with self.assertRaisesRegexp( |
|
+ InvalidURL, r"contain control.*\\r.*(found at least . .)"): |
|
+ urllib2.urlopen("http:" + schemeless_url) |
|
+ with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"): |
|
+ urllib2.urlopen("https:" + schemeless_url) |
|
+ finally: |
|
+ self.unfakehttp() |
|
+ |
|
+ |
|
+ |
|
class RequestTests(unittest.TestCase): |
|
|
|
def setUp(self): |
|
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py |
|
index 79e862a..347b494 100644 |
|
--- a/Lib/test/test_xmlrpc.py |
|
+++ b/Lib/test/test_xmlrpc.py |
|
@@ -592,7 +592,13 @@ class SimpleServerTestCase(BaseServerTestCase): |
|
def test_partial_post(self): |
|
# Check that a partial POST doesn't make the server loop: issue #14001. |
|
conn = httplib.HTTPConnection(ADDR, PORT) |
|
- conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye') |
|
+ conn.send('POST /RPC2 HTTP/1.0\r\n' |
|
+ 'Content-Length: 100\r\n\r\n' |
|
+ 'bye HTTP/1.1\r\n' |
|
+ 'Host: %s:%s\r\n' |
|
+ 'Accept-Encoding: identity\r\n' |
|
+ 'Content-Length: 0\r\n\r\n' |
|
+ % (ADDR, PORT)) |
|
conn.close() |
|
|
|
class MultiPathServerTestCase(BaseServerTestCase): |
|
diff --git a/Lib/urllib.py b/Lib/urllib.py |
|
index 9b31df1..2201e3e 100644 |
|
--- a/Lib/urllib.py |
|
+++ b/Lib/urllib.py |
|
@@ -1079,8 +1079,7 @@ def splithost(url): |
|
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" |
|
global _hostprog |
|
if _hostprog is None: |
|
- import re |
|
- _hostprog = re.compile('^//([^/?]*)(.*)$') |
|
+ _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) |
|
|
|
match = _hostprog.match(url) |
|
if match:
|
|
|