From 99274b9e7524be7e8f041e327be1dd7ba3fdc8a4 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Thu, 21 Apr 2016 10:56:28 -0700 Subject: [PATCH] Key connection pools off custom keys --- CONTRIBUTORS.txt | 3 + docs/managers.rst | 44 +++++++++ test/test_poolmanager.py | 230 ++++++++++++++++++++++++++++++++++++++++++++++- urllib3/poolmanager.py | 104 +++++++++++++++++++-- 4 files changed, 371 insertions(+), 10 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 5141a50..f32faaa 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -142,5 +142,8 @@ In chronological order: * Alex Gaynor * Updates to the default SSL configuration +* Jeremy Cline + * Added connection pool keys by scheme + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/docs/managers.rst b/docs/managers.rst index 6c841b7..ef91ca4 100644 --- a/docs/managers.rst +++ b/docs/managers.rst @@ -28,6 +28,44 @@ so you don't have to. >>> conn.num_requests 3 +A :class:`.PoolManager` will create a new :doc:`ConnectionPool ` +when no :doc:`ConnectionPools ` exist with a matching pool key. +The pool key is derived using the requested URL and the current values +of the ``connection_pool_kw`` instance variable on :class:`.PoolManager`. + +The keys in ``connection_pool_kw`` used when deriving the key are +configurable. For example, by default the ``my_field`` key is not +considered. + +.. doctest :: + + >>> from urllib3.poolmanager import PoolManager + >>> manager = PoolManager(10, my_field='wheat') + >>> manager.connection_from_url('http://example.com') + >>> manager.connection_pool_kw['my_field'] = 'barley' + >>> manager.connection_from_url('http://example.com') + >>> len(manager.pools) + 1 + +To make the pool manager create new pools when the value of +``my_field`` changes, you can define a custom pool key and alter +the ``key_fn_by_scheme`` instance variable on :class:`.PoolManager`. + +.. doctest :: + + >>> import functools + >>> from collections import namedtuple + >>> from urllib3.poolmanager import PoolManager, HTTPPoolKey + >>> from urllib3.poolmanager import default_key_normalizer as normalizer + >>> CustomKey = namedtuple('CustomKey', HTTPPoolKey._fields + ('my_field',)) + >>> manager = PoolManager(10, my_field='wheat') + >>> manager.key_fn_by_scheme['http'] = functools.partial(normalizer, CustomKey) + >>> manager.connection_from_url('http://example.com') + >>> manager.connection_pool_kw['my_field'] = 'barley' + >>> manager.connection_from_url('http://example.com') + >>> len(manager.pools) + 2 + The API of a :class:`.PoolManager` object is similar to that of a :doc:`ConnectionPool `, so they can be passed around interchangeably. @@ -59,6 +97,12 @@ API .. autoclass:: PoolManager :inherited-members: + .. autoclass:: BasePoolKey + :inherited-members: + .. autoclass:: HTTPPoolKey + :inherited-members: + .. autoclass:: HTTPSPoolKey + :inherited-members: ProxyManager ============ diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 6195d51..fb134fb 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -1,11 +1,21 @@ +import functools import unittest +from collections import namedtuple -from urllib3.poolmanager import PoolManager +from urllib3.poolmanager import ( + _default_key_normalizer, + HTTPPoolKey, + HTTPSPoolKey, + key_fn_by_scheme, + PoolManager, + SSL_KEYWORDS, +) from urllib3 import connection_from_url from urllib3.exceptions import ( ClosedPoolError, LocationValueError, ) +from urllib3.util import retry, timeout class TestPoolManager(unittest.TestCase): @@ -87,6 +97,224 @@ class TestPoolManager(unittest.TestCase): self.assertEqual(len(p.pools), 0) + def test_http_pool_key_fields(self): + """Assert the HTTPPoolKey fields are honored when selecting a pool.""" + connection_pool_kw = { + 'timeout': timeout.Timeout(3.14), + 'retries': retry.Retry(total=6, connect=2), + 'block': True, + 'strict': True, + 'source_address': '127.0.0.1', + } + p = PoolManager() + conn_pools = [ + p.connection_from_url('http://example.com/'), + p.connection_from_url('http://example.com:8000/'), + p.connection_from_url('http://other.example.com/'), + ] + + for key, value in connection_pool_kw.items(): + p.connection_pool_kw[key] = value + conn_pools.append(p.connection_from_url('http://example.com/')) + + self.assertTrue( + all( + x is not y + for i, x in enumerate(conn_pools) + for j, y in enumerate(conn_pools) + if i != j + ) + ) + self.assertTrue( + all( + isinstance(key, HTTPPoolKey) + for key in p.pools.keys()) + ) + + def test_http_pool_key_extra_kwargs(self): + """Assert non-HTTPPoolKey fields are ignored when selecting a pool.""" + p = PoolManager() + conn_pool = p.connection_from_url('http://example.com/') + p.connection_pool_kw['some_kwarg'] = 'that should be ignored' + other_conn_pool = p.connection_from_url('http://example.com/') + + self.assertTrue(conn_pool is other_conn_pool) + + def test_http_pool_key_https_kwargs(self): + """Assert HTTPSPoolKey fields are ignored when selecting a HTTP pool.""" + p = PoolManager() + conn_pool = p.connection_from_url('http://example.com/') + for key in SSL_KEYWORDS: + p.connection_pool_kw[key] = 'this should be ignored' + other_conn_pool = p.connection_from_url('http://example.com/') + + self.assertTrue(conn_pool is other_conn_pool) + + def test_https_pool_key_fields(self): + """Assert the HTTPSPoolKey fields are honored when selecting a pool.""" + connection_pool_kw = { + 'timeout': timeout.Timeout(3.14), + 'retries': retry.Retry(total=6, connect=2), + 'block': True, + 'strict': True, + 'source_address': '127.0.0.1', + 'key_file': '/root/totally_legit.key', + 'cert_file': '/root/totally_legit.crt', + 'cert_reqs': 'CERT_REQUIRED', + 'ca_certs': '/root/path_to_pem', + 'ssl_version': 'SSLv23_METHOD', + } + p = PoolManager() + conn_pools = [ + p.connection_from_url('https://example.com/'), + p.connection_from_url('https://example.com:4333/'), + p.connection_from_url('https://other.example.com/'), + ] + # Asking for a connection pool with the same key should give us an + # existing pool. + dup_pools = [] + + for key, value in connection_pool_kw.items(): + p.connection_pool_kw[key] = value + conn_pools.append(p.connection_from_url('https://example.com/')) + dup_pools.append(p.connection_from_url('https://example.com/')) + + self.assertTrue( + all( + x is not y + for i, x in enumerate(conn_pools) + for j, y in enumerate(conn_pools) + if i != j + ) + ) + self.assertTrue(all(pool in conn_pools for pool in dup_pools)) + self.assertTrue( + all( + isinstance(key, HTTPSPoolKey) + for key in p.pools.keys()) + ) + + def test_https_pool_key_extra_kwargs(self): + """Assert non-HTTPSPoolKey fields are ignored when selecting a pool.""" + p = PoolManager() + conn_pool = p.connection_from_url('https://example.com/') + p.connection_pool_kw['some_kwarg'] = 'that should be ignored' + other_conn_pool = p.connection_from_url('https://example.com/') + + self.assertTrue(conn_pool is other_conn_pool) + + def test_default_pool_key_funcs_copy(self): + """Assert each PoolManager gets a copy of ``pool_keys_by_scheme``.""" + p = PoolManager() + self.assertEqual(p.key_fn_by_scheme, p.key_fn_by_scheme) + self.assertFalse(p.key_fn_by_scheme is key_fn_by_scheme) + + def test_pools_keyed_with_from_host(self): + """Assert pools are still keyed correctly with connection_from_host.""" + ssl_kw = { + 'key_file': '/root/totally_legit.key', + 'cert_file': '/root/totally_legit.crt', + 'cert_reqs': 'CERT_REQUIRED', + 'ca_certs': '/root/path_to_pem', + 'ssl_version': 'SSLv23_METHOD', + } + p = PoolManager(5, **ssl_kw) + conns = [] + conns.append( + p.connection_from_host('example.com', 443, scheme='https') + ) + + for k in ssl_kw: + p.connection_pool_kw[k] = 'newval' + conns.append( + p.connection_from_host('example.com', 443, scheme='https') + ) + + self.assertTrue( + all( + x is not y + for i, x in enumerate(conns) + for j, y in enumerate(conns) + if i != j + ) + ) + + def test_https_connection_from_url_case_insensitive(self): + """Assert scheme case is ignored when pooling HTTPS connections.""" + p = PoolManager() + pool = p.connection_from_url('https://example.com/') + other_pool = p.connection_from_url('HTTPS://EXAMPLE.COM/') + + self.assertEqual(1, len(p.pools)) + self.assertTrue(pool is other_pool) + self.assertTrue(all(isinstance(key, HTTPSPoolKey) for key in p.pools.keys())) + + def test_https_connection_from_host_case_insensitive(self): + """Assert scheme case is ignored when getting the https key class.""" + p = PoolManager() + pool = p.connection_from_host('example.com', scheme='https') + other_pool = p.connection_from_host('EXAMPLE.COM', scheme='HTTPS') + + self.assertEqual(1, len(p.pools)) + self.assertTrue(pool is other_pool) + self.assertTrue(all(isinstance(key, HTTPSPoolKey) for key in p.pools.keys())) + + def test_https_connection_from_context_case_insensitive(self): + """Assert scheme case is ignored when getting the https key class.""" + p = PoolManager() + context = {'scheme': 'https', 'host': 'example.com', 'port': '443'} + other_context = {'scheme': 'HTTPS', 'host': 'EXAMPLE.COM', 'port': '443'} + pool = p.connection_from_context(context) + other_pool = p.connection_from_context(other_context) + + self.assertEqual(1, len(p.pools)) + self.assertTrue(pool is other_pool) + self.assertTrue(all(isinstance(key, HTTPSPoolKey) for key in p.pools.keys())) + + def test_http_connection_from_url_case_insensitive(self): + """Assert scheme case is ignored when pooling HTTP connections.""" + p = PoolManager() + pool = p.connection_from_url('http://example.com/') + other_pool = p.connection_from_url('HTTP://EXAMPLE.COM/') + + self.assertEqual(1, len(p.pools)) + self.assertTrue(pool is other_pool) + self.assertTrue(all(isinstance(key, HTTPPoolKey) for key in p.pools.keys())) + + def test_http_connection_from_host_case_insensitive(self): + """Assert scheme case is ignored when getting the https key class.""" + p = PoolManager() + pool = p.connection_from_host('example.com', scheme='http') + other_pool = p.connection_from_host('EXAMPLE.COM', scheme='HTTP') + + self.assertEqual(1, len(p.pools)) + self.assertTrue(pool is other_pool) + self.assertTrue(all(isinstance(key, HTTPPoolKey) for key in p.pools.keys())) + + def test_http_connection_from_context_case_insensitive(self): + """Assert scheme case is ignored when getting the https key class.""" + p = PoolManager() + context = {'scheme': 'http', 'host': 'example.com', 'port': '8080'} + other_context = {'scheme': 'HTTP', 'host': 'EXAMPLE.COM', 'port': '8080'} + pool = p.connection_from_context(context) + other_pool = p.connection_from_context(other_context) + + self.assertEqual(1, len(p.pools)) + self.assertTrue(pool is other_pool) + self.assertTrue(all(isinstance(key, HTTPPoolKey) for key in p.pools.keys())) + + def test_custom_pool_key(self): + """Assert it is possible to define addition pool key fields.""" + custom_key = namedtuple('CustomKey', HTTPPoolKey._fields + ('my_field',)) + p = PoolManager(10, my_field='barley') + + p.key_fn_by_scheme['http'] = functools.partial(_default_key_normalizer, custom_key) + p.connection_from_url('http://example.com') + p.connection_pool_kw['my_field'] = 'wheat' + p.connection_from_url('http://example.com') + + self.assertEqual(2, len(p.pools)) + if __name__ == '__main__': unittest.main() diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index b8d1e74..4fdae8d 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,3 +1,5 @@ +import collections +import functools import logging try: # Python 3 @@ -17,16 +19,69 @@ from .util.retry import Retry __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] +log = logging.getLogger(__name__) + +SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', + 'ssl_version', 'ca_cert_dir') + +# The base fields to use when determining what pool to get a connection from; +# these do not rely on the ``connection_pool_kw`` and can be determined by the +# URL and potentially the ``urllib3.connection.port_by_scheme`` dictionary. +# +# All custom key schemes should include the fields in this key at a minimum. +BasePoolKey = collections.namedtuple('BasePoolKey', ('scheme', 'host', 'port')) + +# The fields to use when determining what pool to get a HTTP and HTTPS +# connection from. All additional fields must be present in the PoolManager's +# ``connection_pool_kw`` instance variable. +HTTPPoolKey = collections.namedtuple( + 'HTTPPoolKey', BasePoolKey._fields + ('timeout', 'retries', 'strict', + 'block', 'source_address') +) +HTTPSPoolKey = collections.namedtuple( + 'HTTPSPoolKey', HTTPPoolKey._fields + SSL_KEYWORDS +) + + +def _default_key_normalizer(key_class, request_context): + """ + Create a pool key of type ``key_class`` for a request. + + According to RFC 3986, both the scheme and host are case-insensitive. + Therefore, this function normalizes both before constructing the pool + key for an HTTPS request. If you wish to change this behaviour, provide + alternate callables to ``key_fn_by_scheme``. + + :param key_class: + The class to use when constructing the key. This should be a namedtuple + with the ``scheme`` and ``host`` keys at a minimum. + + :param request_context: + A dictionary-like object that contain the context for a request. + It should contain a key for each field in the :class:`HTTPPoolKey` + """ + context = {} + for key in key_class._fields: + context[key] = request_context.get(key) + context['scheme'] = context['scheme'].lower() + context['host'] = context['host'].lower() + return key_class(**context) + + +# A dictionary that maps a scheme to a callable that creates a pool key. +# This can be used to alter the way pool keys are constructed, if desired. +# Each PoolManager makes a copy of this dictionary so they can be configured +# globally here, or individually on the instance. +key_fn_by_scheme = { + 'http': functools.partial(_default_key_normalizer, HTTPPoolKey), + 'https': functools.partial(_default_key_normalizer, HTTPSPoolKey), +} + pool_classes_by_scheme = { 'http': HTTPConnectionPool, 'https': HTTPSConnectionPool, } -log = logging.getLogger(__name__) - -SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', - 'ssl_version') - class PoolManager(RequestMethods): """ @@ -64,6 +119,11 @@ class PoolManager(RequestMethods): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + # Locally set the pool classes and keys so other PoolManagers can + # override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() + def __enter__(self): return self @@ -109,10 +169,36 @@ class PoolManager(RequestMethods): if not host: raise LocationValueError("No host specified.") - scheme = scheme or 'http' - port = port or port_by_scheme.get(scheme, 80) - pool_key = (scheme, host, port) + request_context = self.connection_pool_kw.copy() + request_context['scheme'] = scheme or 'http' + if not port: + port = port_by_scheme.get(request_context['scheme'].lower(), 80) + request_context['port'] = port + request_context['host'] = host + + return self.connection_from_context(request_context) + + def connection_from_context(self, request_context): + """ + Get a :class:`ConnectionPool` based on the request context. + + ``request_context`` must at least contain the ``scheme`` key and its + value must be a key in ``key_fn_by_scheme`` instance variable. + """ + scheme = request_context['scheme'].lower() + pool_key_constructor = self.key_fn_by_scheme[scheme] + pool_key = pool_key_constructor(request_context) + + return self.connection_from_pool_key(pool_key) + def connection_from_pool_key(self, pool_key): + """ + Get a :class:`ConnectionPool` based on the provided pool key. + + ``pool_key`` should be a namedtuple that only contains immutable + objects. At a minimum it must have the ``scheme``, ``host``, and + ``port`` fields. + """ with self.pools.lock: # If the scheme, host, or port doesn't match existing open # connections, open a new ConnectionPool. @@ -121,7 +207,7 @@ class PoolManager(RequestMethods): return pool # Make a fresh ConnectionPool of the desired type - pool = self._new_pool(scheme, host, port) + pool = self._new_pool(pool_key.scheme, pool_key.host, pool_key.port) self.pools[pool_key] = pool return pool -- 2.5.5