jenkins-bot has submitted this change and it was merged. Change subject: Switch from httplib2 to requests ......................................................................
Switch from httplib2 to requests This is a GSoC bonding task to evaluate availability of switching to requests. Bug: T98671 Change-Id: Ic56d43ae1160d1ced796426ce00afe544e88f9e5 --- M .appveyor.yml M .gitmodules M .travis.yml M LICENSE M README-conversion.txt M docs/requirements-py3.txt D externals/README D externals/__init__.py D externals/httplib2 M pwb.py M pywikibot/bot.py M pywikibot/comms/http.py M pywikibot/comms/threadedhttp.py M pywikibot/config2.py M pywikibot/data/api.py M requirements.txt M scripts/version.py M setup.py M tests/__init__.py M tests/http_tests.py M tests/utils.py 21 files changed, 124 insertions(+), 784 deletions(-) Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified diff --git a/.appveyor.yml b/.appveyor.yml index 64f3cf1..bd16e2e 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -28,8 +28,6 @@ - ps: (new-object net.webclient).DownloadFile('https://raw.githubusercontent.com/pypa/python-packaging-user-guide/master/source/code/install.ps1', 'install.ps1') - "powershell install.ps1" - ps: (new-object net.webclient).DownloadFile('https://raw.githubusercontent.com/pypa/python-packaging-user-guide/master/source/code/run_with_compiler.cmd', 'run_with_compiler.cmd') - - cd externals/httplib2 - - "%WITH_COMPILER% %PYTHON%\\python setup.py install" - cd ..\\.. build: off diff --git a/.gitmodules b/.gitmodules index 8400953..b89493f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,9 +3,3 @@ url = https://gerrit.wikimedia.org/r/p/pywikibot/i18n.git branch = . ignore = all - -[submodule "externals/httplib2"] - path = externals/httplib2 - url = https://gerrit.wikimedia.org/r/p/pywikibot/externals/httplib2.git - branch = . - ignore = all diff --git a/.travis.yml b/.travis.yml index 40289f8..0004dbf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,13 +28,7 @@ - mkdir ~/.python-eggs - chmod 700 ~/.python-eggs - # httplib2 is needed to run generate_user_files - - if [[ "$EXTERNALS_HTTPLIB2" == "1" ]]; then - (cd externals/httplib2 && python setup.py install) ; - else - rm -rf externals/httplib2 ; - pip install httplib2 ; - fi + - pip install requests - if [[ "$GITHUB_USER" != "wikimedia" ]]; then export PYWIKIBOT2_TEST_WRITE_FAIL=1 ; @@ -80,7 +74,7 @@ - secure: kofInMlisiTBt9o/Ustc/vySlkKfxGzGCX2LwA1D2waym8sDTS0o5gMJ5LsrT/BUKwZbe1vLozPHqZrrkQvsdTml+DpZuotzdILs0m0f3BUoexEC6OON5IDljuxFyETrD1Ug44ih5Mc4lVFOdTcBzg501ZmswGwQrBvg/OyEFfE= matrix: - - LANGUAGE=en FAMILY=wikipedia EXTERNALS_HTTPLIB2=1 PYWIKIBOT2_TEST_PROD_ONLY=1 + - LANGUAGE=en FAMILY=wikipedia PYWIKIBOT2_TEST_PROD_ONLY=1 - LANGUAGE=fr FAMILY=wiktionary PYSETUP_TEST_EXTRAS=1 PYWIKIBOT2_TEST_PROD_ONLY=1 PYWIKIBOT2_TEST_NO_RC=1 matrix: @@ -88,7 +82,7 @@ - python: '2.7' env: LANGUAGE=he FAMILY=wikivoyage SITE_ONLY=1 - python: '3.3' - env: LANGUAGE=zh FAMILY=wikisource SITE_ONLY=1 EXTERNALS_HTTPLIB2=1 + env: LANGUAGE=zh FAMILY=wikisource SITE_ONLY=1 - python: '3.4' env: LANGUAGE=test FAMILY=wikidata SITE_ONLY=1 - python: 'nightly' diff --git a/LICENSE b/LICENSE index 71d9a57..c8f0800 100644 --- a/LICENSE +++ b/LICENSE @@ -20,8 +20,3 @@ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Everything in the 'externals' directory is general released under separate -licenses. Some parts may come from Pywikibot team and share the same -permissions, but in general they have to be checked for any external package -separately. diff --git a/README-conversion.txt b/README-conversion.txt index dd6c1c7..72d299f 100644 --- a/README-conversion.txt +++ b/README-conversion.txt @@ -48,28 +48,9 @@ so that these dependencies will be loaded automatically when the package is installed, and users won't need to worry about this...] -To run pywikibot, you will need the httplib2 package: -* https://github.com/jcgregorio/httplib2 +To run pywikibot, you will need the requests package: It may be installed using pip or easy_install. - -The minimum requirement is httplib2 0.6.0. -However setup.py requires httplib2 0.9.0, as that version includes current -root certificates needed to access Wikimedia servers using HTTPS. - -If your operating systems provides a packaged httplib2, it may be -altered to load the root certificates from the host operating system. -To check, execute: -$ python -c 'import httplib2; print httplib2.CA_CERTS' - -httplib2 0.8.0 added the ability to define CA_CERTS with a plugin module. -If you need to use 0.8.0, install module httplib2.ca_certs_locater with pip, -and contribute fixes as necessary. -https://pypi.python.org/pypi/httplib2.ca_certs_locater -https://github.com/dreamhost/httplib2-ca_certs_locater - -If you use the pwb.py script, it will attempt to load httplib2 from the -externals directory, which is a git submodule containing httplib2 0.9.0. == Page objects == diff --git a/docs/requirements-py3.txt b/docs/requirements-py3.txt index 89f085e..f8b0450 100644 --- a/docs/requirements-py3.txt +++ b/docs/requirements-py3.txt @@ -5,4 +5,4 @@ sphinx-epytext>=0.0.3 # mandatory; see README.conversion.txt -httplib2>=0.9.0 +requests diff --git a/externals/README b/externals/README deleted file mode 100644 index 2195f75..0000000 --- a/externals/README +++ /dev/null @@ -1,11 +0,0 @@ -This package is only necessary to run Pywikibot from a fully self-sufficient -(no other dependencies other than python 2.6+) directory. -This is especially useful on Windows. - -Usually - under normal circumstances - these packages should be installed -separately elsewhere in the OS by the standard package managing system. - -If you want to run the rewrite as a stand-alone package, you may download -dependencies into the externals/ subdirectory, and they will automatically -be used by the pwb.py script. - diff --git a/externals/__init__.py b/externals/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/externals/__init__.py +++ /dev/null diff --git a/externals/httplib2 b/externals/httplib2 deleted file mode 160000 index ada7d1f..0000000 --- a/externals/httplib2 +++ /dev/null -Subproject commit ada7d1fa9694f81e8ad64063e4f35a680c501ce0 diff --git a/pwb.py b/pwb.py index 45415c0..07e258b 100755 --- a/pwb.py +++ b/pwb.py @@ -127,43 +127,23 @@ sys.path = [sys.path[0], rewrite_path, os.path.join(rewrite_path, 'pywikibot', 'compat'), - os.path.join(rewrite_path, 'externals') ] + sys.path[1:] -# try importing the known externals, and raise an error if they are not found try: - import httplib2 - if not hasattr(httplib2, '__version__'): - print("httplib2 import problem: httplib2.__version__ does not exist.") - if sys.version_info > (3, 3): - print("Python 3.4+ has probably loaded externals/httplib2 " - "although it doesnt have an __init__.py.") - httplib2 = None + import requests + if not hasattr(requests, '__version__'): + print("requests import problem: requests.__version__ does not exist.") + requests = None except ImportError as e: print("ImportError: %s" % e) - httplib2 = None + requests = None -if not httplib2: - print("Python module httplib2 >= 0.6.0 is required.") - print("Did you clone without --recursive?\n" - "Try running 'git submodule update --init' " - "or 'pip install httplib2'.") +if not requests: + print("Python module requests is required.") + print("Try running 'pip install requests'.") sys.exit(1) -# httplib2 0.6.0 was released with __version__ as '$Rev$' -# and no module variable CA_CERTS. -if httplib2.__version__ == '$Rev$' and 'CA_CERTS' not in httplib2.__dict__: - httplib2.__version__ = '0.6.0' -from distutils.version import StrictVersion -if StrictVersion(httplib2.__version__) < StrictVersion("0.6.0"): - print("Python module httplib2 (%s) needs to be 0.6.0 or greater." % - httplib2.__file__) - print("Did you clone without --recursive?\n" - "Try running 'git submodule update --init' " - "or 'pip install --upgrade httplib2'.") - sys.exit(1) - -del httplib2 +del requests # Search for user-config.py before creating one. try: diff --git a/pywikibot/bot.py b/pywikibot/bot.py index 041c5c9..c14207e 100644 --- a/pywikibot/bot.py +++ b/pywikibot/bot.py @@ -334,7 +334,7 @@ all_modules = sys.modules.keys() # These are the main dependencies of pywikibot. - check_package_list = ['httplib2', 'mwparserfromhell'] + check_package_list = ['requests', 'mwparserfromhell'] # report all imported packages if config.verbose_output: diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index b336a0a..6663462 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -24,40 +24,16 @@ import atexit import sys -import time -from distutils.version import StrictVersion from string import Formatter from warnings import warn -# Verify that a working httplib2 is present. -try: - import httplib2 -except ImportError: - print("Error: Python module httplib2 >= 0.6.0 is required.") - sys.exit(1) - -# httplib2 0.6.0 was released with __version__ as '$Rev$' -# and no module variable CA_CERTS. -if httplib2.__version__ == '$Rev$' and 'CA_CERTS' not in httplib2.__dict__: - httplib2.__version__ = '0.6.0' -if StrictVersion(httplib2.__version__) < StrictVersion("0.6.0"): - print("Error: Python module httplib2 (%s) is not 0.6.0 or greater." % - httplib2.__file__) - sys.exit(1) +import requests if sys.version_info[0] > 2: - from ssl import SSLError as SSLHandshakeError - import queue as Queue from http import cookiejar as cookielib from urllib.parse import quote else: - if 'SSLHandshakeError' in httplib2.__dict__: - from httplib2 import SSLHandshakeError - elif httplib2.__version__ == '0.6.0': - from httplib2 import ServerNotFoundError as SSLHandshakeError - - import Queue import cookielib from urllib2 import quote @@ -81,16 +57,10 @@ _logger = "comm.http" -# global variables - -numthreads = 1 -threads = [] - -connection_pool = threadedhttp.ConnectionPool() -http_queue = Queue.Queue() +session = requests.Session() cookie_jar = cookielib.LWPCookieJar( - config.datafilepath("pywikibot.lwp")) + config.datafilepath("pywikibot.lwp2")) try: cookie_jar.load() except (IOError, cookielib.LoadError): @@ -98,23 +68,13 @@ else: pywikibot.debug(u"Loaded cookies from file.", _logger) - -# Build up HttpProcessors -pywikibot.log(u'Starting %(numthreads)i threads...' % locals()) -for i in range(numthreads): - proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool) - proc.setDaemon(True) - threads.append(proc) - proc.start() +session.cookies = cookie_jar # Prepare flush on quit def _flush(): - for i in threads: - http_queue.put(None) - - message = (u'Waiting for %i network thread(s) to finish. ' - u'Press ctrl-c to abort' % len(threads)) + session.close() + message = 'Closing network session.' if hasattr(sys, 'last_type'): # we quit because of an exception print(sys.last_type) @@ -122,18 +82,17 @@ else: pywikibot.log(message) - while any(t for t in threads if t.isAlive()): - time.sleep(.1) - - pywikibot.log(u"All threads finished.") + pywikibot.log('Network session closed.') atexit.register(_flush) + # export cookie_jar to global namespace pywikibot.cookie_jar = cookie_jar + USER_AGENT_PRODUCTS = { 'python': 'Python/' + '.'.join([str(i) for i in sys.version_info]), - 'httplib2': 'httplib2/' + httplib2.__version__, + 'http_backend': 'requests/' + requests.__version__, 'pwb': 'Pywikibot/' + pywikibot.__release__, } @@ -238,7 +197,7 @@ """ Request to Site with default error handling and response decoding. - See L{httplib2.Http.request} for additional parameters. + See L{requests.Session.request} for additional parameters. If the site argument is provided, the uri is a relative uri from and including the document root '/'. @@ -280,6 +239,21 @@ return r.content +def _http_process(session, http_request): + method = http_request.method + uri = http_request.uri + body = http_request.body + headers = http_request.headers + + try: + request = session.request(method, uri, data=body, headers=headers, + verify=True) + except Exception as e: + http_request.data = e + else: + http_request.data = request + + def error_handling_callback(request): """ Raise exceptions and log alerts. @@ -288,7 +262,7 @@ @rtype request: L{threadedhttp.HttpRequest} """ # TODO: do some error correcting stuff - if isinstance(request.data, SSLHandshakeError): + if isinstance(request.data, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(request.data): raise FatalServerError(str(request.data)) @@ -306,7 +280,7 @@ # used by the version module. if request.status not in (200, 207): pywikibot.warning(u"Http response status %(status)s" - % {'status': request.data[0].status}) + % {'status': request.data.status_code}) def _enqueue(uri, method="GET", body=None, headers=None, **kwargs): @@ -324,7 +298,7 @@ as they are limited by the number of http threads in L{numthreads}, which is set to 1 by default. - @see: L{httplib2.Http.request} for parameters. + @see: L{requests.Session.request} for parameters. @kwarg default_error_handling: Use default error handling @type default_error_handling: bool @@ -354,7 +328,7 @@ request = threadedhttp.HttpRequest( uri, method, body, headers, callbacks, **kwargs) - http_queue.put(request) + _http_process(session, request) return request @@ -366,15 +340,14 @@ Note: The callback runs in the HTTP thread, where exceptions are logged but are not able to be caught. - See L{httplib2.Http.request} for parameters. + See L{requests.Session.request} for parameters. @param default_error_handling: Use default error handling @type default_error_handling: bool @rtype: L{threadedhttp.HttpRequest} """ request = _enqueue(uri, method, body, headers, **kwargs) - request._join() # wait for it - assert(request._data) # if there's no data in the answer we're in trouble + assert(request._data is not None) # if there's no data in the answer we're in trouble # Run the error handling callback in the callers thread so exceptions # may be caught. if default_error_handling: diff --git a/pywikibot/comms/threadedhttp.py b/pywikibot/comms/threadedhttp.py index 69cf1f8..786d521 100644 --- a/pywikibot/comms/threadedhttp.py +++ b/pywikibot/comms/threadedhttp.py @@ -1,326 +1,30 @@ # -*- coding: utf-8 -*- -"""Httplib2 threaded cookie layer. - -This class extends httplib2, adding support for: - - Cookies, guarded for cross-site redirects - - Thread safe ConnectionPool class - - HttpProcessor thread class - - HttpRequest object - -""" +"""Http backend layer, formerly providing a httplib2 wrapper.""" from __future__ import unicode_literals - -# (C) Pywikibot team, 2007-2014 -# (C) Httplib 2 team, 2006 -# (C) Metaweb Technologies, Inc., 2007 -# -# Partially distributed under the MIT license -# Partially distributed under Metaweb Technologies, Incs license -# which is compatible with the MIT license +# (C) Pywikibot team, 2007-2015 __version__ = '$Id$' __docformat__ = 'epytext' # standard python libraries import codecs -import re import sys -import threading if sys.version_info[0] > 2: - from http import cookiejar as cookielib - from urllib.parse import splittype, splithost, unquote, urlparse, urljoin + from urllib.parse import urlparse else: - import cookielib - from urlparse import urlparse, urljoin - from urllib import splittype, splithost, unquote + from urlparse import urlparse import pywikibot - -from pywikibot import config from pywikibot.tools import UnicodeMixin _logger = "comm.threadedhttp" -import httplib2 - - -class ConnectionPool(object): - - """A thread-safe connection pool.""" - - def __init__(self, maxnum=5): - """ - Constructor. - - @param maxnum: Maximum number of connections per identifier. - The pool drops excessive connections added. - - """ - pywikibot.debug(u"Creating connection pool.", _logger) - self.connections = {} - self.lock = threading.Lock() - self.maxnum = maxnum - - def __del__(self): - """Destructor to close all connections in the pool.""" - self.lock.acquire() - try: - pywikibot.debug(u"Closing connection pool (%s connections)" - % len(self.connections), - _logger) - for key in self.connections: - for connection in self.connections[key]: - connection.close() - except (AttributeError, TypeError): - pass # this shows up when logger has been destroyed first - finally: - self.lock.release() - - def __repr__(self): - return self.connections.__repr__() - - def pop_connection(self, identifier): - """Get a connection from identifier's connection pool. - - @param identifier: The pool identifier - @return: A connection object if found, None otherwise - - """ - self.lock.acquire() - try: - if identifier in self.connections: - if len(self.connections[identifier]) > 0: - pywikibot.debug(u"Retrieved connection from '%s' pool." - % identifier, - _logger) - return self.connections[identifier].pop() - return None - finally: - self.lock.release() - - def push_connection(self, identifier, connection): - """Add a connection to identifier's connection pool. - - @param identifier: The pool identifier - @param connection: The connection to add to the pool - - """ - self.lock.acquire() - try: - if identifier not in self.connections: - self.connections[identifier] = [] - - if len(self.connections[identifier]) != self.maxnum: - self.connections[identifier].append(connection) - else: - pywikibot.debug(u"closing %s connection %r" - % (identifier, connection), - _logger) - connection.close() - del connection - finally: - self.lock.release() - - -class Http(httplib2.Http): - - """Subclass of httplib2.Http that stores cookies. - - Overrides httplib2's internal redirect support to prevent cookies being - eaten by the wrong sites. - """ - - def __init__(self, *args, **kwargs): - """ - Constructor. - - @kwarg cookiejar: (optional) CookieJar to use. A new one will be - used when not supplied. - @kwarg connection_pool: (optional) Connection pool to use. A new one - will be used when not supplied. - @kwarg max_redirects: (optional) The maximum number of redirects to - follow. 5 is default. - @kwarg timeout: (optional) Socket timeout in seconds. Default is - config.socket_timeout. Disable with None. - - """ - try: - self.cookiejar = kwargs.pop('cookiejar') - except KeyError: - self.cookiejar = cookielib.CookieJar() - - try: - self.connection_pool = kwargs.pop('connection_pool') - except KeyError: - self.connection_pool = ConnectionPool() - self.max_redirects = kwargs.pop('max_redirects', 5) - if len(args) < 3: - kwargs.setdefault('proxy_info', config.proxy) - kwargs.setdefault('timeout', config.socket_timeout) - httplib2.Http.__init__(self, *args, **kwargs) - - def request(self, uri, method="GET", body=None, headers=None, - max_redirects=None, connection_type=None): - """Start an HTTP request. - - @param uri: The uri to retrieve - @param method: (optional) The HTTP method to use. Default is 'GET' - @param body: (optional) The request body. Default is no body. - @param headers: (optional) Additional headers to send. Defaults - include C{connection: keep-alive}, C{user-agent} and - C{content-type}. - @param max_redirects: (optional) The maximum number of redirects to - use for this request. The class instance's max_redirects is - default - @param connection_type: (optional) see L{httplib2.Http.request} - - @return: (response, content) tuple - - """ - if max_redirects is None: - max_redirects = self.max_redirects - if headers is None: - headers = {} - # Prepare headers - headers.pop('cookie', None) - req = DummyRequest(uri, headers) - self.cookiejar.add_cookie_header(req) - - headers = req.headers - - # Wikimedia squids: add connection: keep-alive to request headers - # unless overridden - headers['connection'] = headers.pop('connection', 'keep-alive') - - # determine connection pool key and fetch connection - (scheme, authority, request_uri, - defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri)) - conn_key = scheme + ":" + authority - - connection = self.connection_pool.pop_connection(conn_key) - if connection is not None: - self.connections[conn_key] = connection - - # Redirect hack: we want to regulate redirects - follow_redirects = self.follow_redirects - self.follow_redirects = False - pywikibot.debug(u"%r" % ( - (uri.replace("%7C", "|"), method, body, - headers, max_redirects, - connection_type), - ), _logger) - try: - if authority in config.authenticate: - self.add_credentials(*config.authenticate[authority]) - - (response, content) = httplib2.Http.request( - self, uri, method, body, headers, - max_redirects, connection_type - ) - except Exception as e: # what types? - # return exception instance to be retrieved by the calling thread - return e - finally: - self.follow_redirects = follow_redirects - - # return connection to pool - self.connection_pool.push_connection(conn_key, - self.connections[conn_key]) - del self.connections[conn_key] - - # First write cookies - self.cookiejar.extract_cookies(DummyResponse(response), req) - - # Check for possible redirects - redirectable_response = ((response.status == 303) or - (response.status in [300, 301, 302, 307] and - method in ["GET", "HEAD"])) - if (self.follow_redirects and (max_redirects > 0) and - redirectable_response): - # Return directly and not unpack the values in case the result was - # an exception, which can't be unpacked - return self._follow_redirect( - uri, method, body, headers, response, content, max_redirects) - else: - return response, content - - def _follow_redirect(self, uri, method, body, headers, response, - content, max_redirects): - """Internal function to follow a redirect recieved by L{request}.""" - (scheme, authority, absolute_uri, - defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri)) - if self.cache: - cachekey = defrag_uri - else: - cachekey = None - - # Pick out the location header and basically start from the beginning - # remembering first to strip the ETag header and decrement our 'depth' - if "location" not in response and response.status != 300: - raise httplib2.RedirectMissingLocation( - "Redirected but the response is missing a Location: header.", - response, content) - # Fix-up relative redirects (which violate an RFC 2616 MUST) - if "location" in response: - location = response['location'] - (scheme, authority, path, query, - fragment) = httplib2.parse_uri(location) - if authority is None: - response['location'] = urljoin(uri, location) - pywikibot.debug(u"Relative redirect: changed [%s] to [%s]" - % (location, response['location']), - _logger) - if response.status == 301 and method in ["GET", "HEAD"]: - response['-x-permanent-redirect-url'] = response['location'] - if "content-location" not in response: - response['content-location'] = absolute_uri - httplib2._updateCache(headers, response, content, self.cache, - cachekey) - - headers.pop('if-none-match', None) - headers.pop('if-modified-since', None) - - if "location" in response: - location = response['location'] - redirect_method = ((response.status == 303) and - (method not in ["GET", "HEAD"]) - ) and "GET" or method - return self.request(location, redirect_method, body=body, - headers=headers, - max_redirects=max_redirects - 1) - else: - return httplib2.RedirectLimit( - "Redirected more times than redirection_limit allows.", - response, content) - - class HttpRequest(UnicodeMixin): """Object wrapper for HTTP requests that need to block origin thread. - - Usage: - - >>> from .http import Queue - >>> queue = Queue.Queue() - >>> cookiejar = cookielib.CookieJar() - >>> connection_pool = ConnectionPool() - >>> proc = HttpProcessor(queue, cookiejar, connection_pool) - >>> proc.setDaemon(True) - >>> proc.start() - >>> request = HttpRequest('https://hostname.invalid/') - >>> queue.put(request) - >>> request.lock.acquire() - True - >>> print(type(request.data)) - <class 'httplib2.ServerNotFoundError'> - >>> print(request.data) - Unable to find the server at hostname.invalid - >>> queue.put(None) # Stop the http processor thread - - C{request.lock.acquire()} will block until the data is available. self.data will be either: * a tuple of (dict, unicode) if the request was successful @@ -354,24 +58,16 @@ self._parsed_uri = None self._data = None - self.lock = threading.Semaphore(0) - - def _join(self): - """Block until response has arrived.""" - self.lock.acquire(True) @property def data(self): - """Return the httplib2 response tuple.""" - if not self._data: - self._join() - - assert(self._data) + """Return the requests response tuple.""" + assert(self._data is not None) return self._data @data.setter def data(self, value): - """Set the httplib2 response and invoke each callback.""" + """Set the requests response and invoke each callback.""" self._data = value if self.callbacks: @@ -380,7 +76,7 @@ @property def exception(self): - """Get the exception raised by httplib2, if any.""" + """Get the exception, if any.""" if isinstance(self.data, Exception): return self.data @@ -388,13 +84,13 @@ def response_headers(self): """Return the response headers.""" if not self.exception: - return self.data[0] + return self.data.headers @property def raw(self): """Return the raw response body.""" if not self.exception: - return self.data[1] + return self.data.content @property def parsed_uri(self): @@ -414,7 +110,8 @@ @rtype: int """ - return self.response_headers.status + if not self.exception: + return self.data.status_code @property def header_encoding(self): @@ -484,170 +181,3 @@ def __bytes__(self): """Return the undecoded response.""" return self.raw - - -class HttpProcessor(threading.Thread): - - """Thread object to spawn multiple HTTP connection threads.""" - - def __init__(self, queue, cookiejar, connection_pool): - """ - Constructor. - - @param queue: The C{Queue.Queue} object that contains L{HttpRequest} - objects. - @param cookiejar: The C{cookielib.CookieJar} cookie object to share among - requests. - @param connection_pool: The C{ConnectionPool} object which contains - connections to share among requests. - - """ - threading.Thread.__init__(self) - self.queue = queue - self.http = Http(cookiejar=cookiejar, connection_pool=connection_pool) - - def run(self): - # The Queue item is expected to either an HttpRequest object - # or None (to shut down the thread) - pywikibot.debug(u"Thread started, waiting for requests.", _logger) - while True: - item = self.queue.get() - if item is None: - pywikibot.debug(u"Shutting down thread.", _logger) - return - - # This needs to be set per request, however it is only used - # the first time the pooled connection is created. - self.http.disable_ssl_certificate_validation = \ - item.kwargs.pop('disable_ssl_certificate_validation', False) - try: - item.data = self.http.request(*item.args, **item.kwargs) - finally: - if item.lock: - item.lock.release() - # if data wasn't set others might hang; but wait on lock release - assert(item._data) - - -# Metaweb Technologies, Inc. License: -# -# ======================================================================== -# The following dummy classes are: -# ======================================================================== -# Copyright (c) 2007, Metaweb Technologies, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY METAWEB TECHNOLOGIES AND CONTRIBUTORS -# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL METAWEB -# TECHNOLOGIES OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# ======================================================================== - -class DummyRequest(object): - - """Simulated urllib2.Request object for httplib2. - - Implements only what's necessary for cookielib.CookieJar to work. - """ - - def __init__(self, url, headers=None): - self.url = url - self.headers = headers - self.origin_req_host = cookielib.request_host(self) - self.type, r = splittype(url) - self.host, r = splithost(r) - if self.host: - self.host = unquote(self.host) - - def get_full_url(self): - return self.url - - def get_origin_req_host(self): - # TODO to match urllib2 this should be different for redirects - return self.origin_req_host - - def get_type(self): - return self.type - - def get_host(self): - return self.host - - def get_header(self, key, default=None): - return self.headers.get(key.lower(), default) - - def has_header(self, key): - return key in self.headers - - def add_unredirected_header(self, key, val): - # TODO this header should not be sent on redirect - self.headers[key.lower()] = val - - def is_unverifiable(self): - # TODO to match urllib2, this should be set to True when the - # request is the result of a redirect - return False - - unverifiable = property(is_unverifiable) - - -class DummyResponse(object): - - """Simulated urllib2.Request object for httplib2. - - Implements only what's necessary for cookielib.CookieJar to work. - """ - - def __init__(self, response): - self.response = response - - def info(self): - return DummyMessage(self.response) - - -class DummyMessage(object): - - """Simulated mimetools.Message object for httplib2. - - Implements only what's necessary for cookielib.CookieJar to work. - """ - - def __init__(self, response): - self.response = response - - def getheaders(self, k): - k = k.lower() - self.response.get(k.lower(), None) - if k not in self.response: - return [] - # return self.response[k].split(re.compile(',\\s*')) - - # httplib2 joins multiple values for the same header - # using ','. but the netscape cookie format uses ',' - # as part of the expires= date format. so we have - # to split carefully here - header.split(',') won't do it. - HEADERVAL = re.compile(r'\s*(([^,]|(,\s*\d))+)') - return [h[0] for h in HEADERVAL.findall(self.response[k])] - - def get_all(self, k, failobj=None): - rv = self.getheaders(k) - if not rv: - return failobj - return rv diff --git a/pywikibot/config2.py b/pywikibot/config2.py index 559a9cf..d70f780 100644 --- a/pywikibot/config2.py +++ b/pywikibot/config2.py @@ -46,6 +46,7 @@ import os import stat import sys +import re from warnings import warn @@ -118,7 +119,7 @@ # User agent format. # For the meaning and more help in customization see: # https://www.mediawiki.org/wiki/Manual:Pywikibot/User-agent -user_agent_format = '{script_product} ({script_comments}) {pwb} ({revision}) {httplib2} {python}' +user_agent_format = '{script_product} ({script_comments}) {pwb} ({revision}) {http_backend} {python}' # The default interface for communicating with the site # currently the only defined interface is 'APISite', so don't change this! @@ -765,11 +766,7 @@ # Proxy configuration -# For proxy support, install socksipy or httplib2 0.7+ -# then add these three lines to your user-config.py: -# from httplib2 import ProxyInfo, socks -# proxy = ProxyInfo(socks.PROXY_TYPE_HTTP, 'localhost', 8000) -# del ProxyInfo, socks +# TODO: proxy support proxy = None # Simulate settings @@ -928,6 +925,16 @@ if _uc[_key] != globals()[_key] or _key in ('usernames', 'sysopnames', 'disambiguation_comment')] +if ('user_agent_format' in _modified): + _right_user_agent_format = re.sub(r'{httplib2(:|})', r'{http_backend\1', + _uc['user_agent_format']) + if _right_user_agent_format != _uc['user_agent_format']: + warn('`{httplib2}` in user_agent_format is deprecated, ' + 'will replace `{httplib2}` with `{http_backend}`', + _ConfigurationDeprecationWarning) + _uc['user_agent_format'] = _right_user_agent_format + del _right_user_agent_format + for _key in _modified: globals()[_key] = _uc[_key] diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index f0a5831..3e82636 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -1547,7 +1547,7 @@ use_get = False if use_get: uri = '{0}?{1}'.format(uri, paramstring) - body = None # default in httplib2 + body = None else: body = paramstring diff --git a/requirements.txt b/requirements.txt index 47b8316..901d94f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ # It is organised so that simple requirements # are processed first, and more difficult packages # are last. -# All dependencies other than httplib2 are optional. +# All dependencies other than requests are optional. # # It is good practise to install packages using the system # package manager if it has a packaged version. If you are @@ -19,7 +19,7 @@ # $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs apt-cache search # mandatory; see README.conversion.txt -httplib2>=0.9.0 +requests # core interwiki_graph.py: pydot diff --git a/scripts/version.py b/scripts/version.py index ca57686..3debf0b 100755 --- a/scripts/version.py +++ b/scripts/version.py @@ -15,12 +15,13 @@ import sys import os +import codecs import pywikibot from pywikibot.version import getversion try: - import httplib2 + import requests except ImportError: - httplib2 = {'__version__': 'n/a'} + requests = {'__version__': 'n/a'} WMF_CACERT = 'MIIDxTCCAq2gAwIBAgIQAqxcJmoLQJuPC3nyrkYldzANBgkqhkiG9w0BAQUFADBs' @@ -33,17 +34,19 @@ if __name__ == '__main__': pywikibot.output('Pywikibot: %s' % getversion()) pywikibot.output('Release version: %s' % pywikibot.__release__) - pywikibot.output('httplib2 version: %s' % httplib2.__version__) + pywikibot.output('requests version: %s' % requests.__version__) has_wikimedia_cert = False - if not hasattr(httplib2, 'CA_CERTS') or not httplib2.CA_CERTS: + if (not hasattr(requests, 'certs') or + not hasattr(requests.certs, 'where') or + not callable(requests.certs.where)): pywikibot.output(' cacerts: not defined') - elif not os.path.isfile(httplib2.CA_CERTS): - pywikibot.output(' cacerts: %s (missing)' % httplib2.CA_CERTS) + elif not os.path.isfile(requests.certs.where()): + pywikibot.output(' cacerts: %s (missing)' % requests.certs.where()) else: - pywikibot.output(' cacerts: %s' % httplib2.CA_CERTS) + pywikibot.output(' cacerts: %s' % requests.certs.where()) - with open(httplib2.CA_CERTS, 'r') as cert_file: + with codecs.open(requests.certs.where(), 'r', 'utf-8') as cert_file: text = cert_file.read() if WMF_CACERT in text: has_wikimedia_cert = True @@ -51,7 +54,7 @@ % ('ok' if has_wikimedia_cert else 'not ok')) if not has_wikimedia_cert: pywikibot.output( - ' Please reinstall httplib2 or run git submodules update!') + ' Please reinstall requests!') pywikibot.output('Python: %s' % sys.version) normalize_text = u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917' diff --git a/setup.py b/setup.py index 5498742..e9c562b 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ test_deps = [] -dependencies = ['httplib2>=0.9'] +dependencies = ['requests'] # the irc module has no Python 2.6 support since 10.0 irc_dep = 'irc==8.9' if sys.version_info < (2, 7) else 'irc' diff --git a/tests/__init__.py b/tests/__init__.py index 3f213d2..429e5c8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -13,16 +13,16 @@ import sys import warnings -__all__ = ('httplib2', '_cache_dir', 'TestRequest', +__all__ = ('requests', '_cache_dir', 'TestRequest', 'patch_request', 'unpatch_request') # Verify that the unit tests have a base working environment: -# - httplib2 is mandatory +# - requests is mandatory # - future is needed as a fallback for python 2.6, # however if unavailable this will fail on use; see pywikibot/tools.py # - mwparserfromhell is optional, so is only imported in textlib_tests try: - import httplib2 # noqa + import requests # noqa except ImportError as e: print("ImportError: %s" % e) sys.exit(1) diff --git a/tests/http_tests.py b/tests/http_tests.py index 498770c..9a7340b 100644 --- a/tests/http_tests.py +++ b/tests/http_tests.py @@ -12,7 +12,7 @@ import os import sys -import httplib2 +import requests import pywikibot @@ -24,13 +24,7 @@ from tests.utils import expected_failure_if if sys.version_info[0] > 2: - from http import cookiejar as cookielib - import queue as Queue - unicode = str -else: - import cookielib - import Queue class HttpTestCase(TestCase): @@ -164,23 +158,23 @@ def test_server_not_found(self): """Test server not found exception.""" - self.assertRaises(httplib2.ServerNotFoundError, + self.assertRaises(requests.exceptions.ConnectionError, http.fetch, uri='http://ru-sib.wikipedia.org/w/api.php', default_error_handling=True) def test_invalid_scheme(self): """Test invalid scheme.""" - # A KeyError is raised within httplib2, in a different thread - self.assertRaises(KeyError, + # A InvalidSchema is raised within requests + self.assertRaises(requests.exceptions.InvalidSchema, http.fetch, uri='invalid://url') def test_follow_redirects(self): """Test follow 301 redirects after an exception works correctly.""" # It doesnt matter what exception is raised here, provided it - # occurs within the httplib2 request method. - self.assertRaises(KeyError, + # occurs within the requests request method. + self.assertRaises(requests.exceptions.InvalidSchema, http.fetch, uri='invalid://url') @@ -188,114 +182,12 @@ r = http.fetch(uri='http://en.wikipedia.org/wiki/Main%20Page') self.assertEqual(r.status, 200) self.assertIn('//en.wikipedia.org/wiki/Main_Page', - r.response_headers['content-location']) + http.session.redirect_cache.get('http://en.wikipedia.org/wiki/Main%20Page')) r = http.fetch(uri='http://www.gandi.eu') self.assertEqual(r.status, 200) - self.assertEqual(r.response_headers['content-location'], + self.assertEqual(http.session.redirect_cache.get('http://www.gandi.eu/'), 'http://www.gandi.net') - - def test_maximum_redirects(self): - """Test that maximum redirect exception doesn't hang up.""" - self.assertRaises(httplib2.RedirectLimit, - http.fetch, - uri='http://httpbin.org/status/300') - - -class ThreadedHttpTestCase(TestCase): - - """Tests for threadedhttp module Http class.""" - - sites = { - 'www-wp': { - 'hostname': 'www.wikipedia.org', - }, - 'wikidata': { - 'hostname': 'test.wikidata.org', - }, - } - - def test_http(self): - """Test threadedhttp.Http.request using http://www.wikipedia.org/.""" - o = threadedhttp.Http() - r = o.request('http://www.wikipedia.org/') - self.assertIsInstance(r, tuple) - self.assertNotIsInstance(r[0], Exception) - self.assertIsInstance(r[0], dict) - self.assertIn('status', r[0]) - self.assertIsInstance(r[0]['status'], str) - self.assertEqual(r[0]['status'], '200') - - self.assertIsInstance(r[1], bytes) - self.assertIn(b'<html lang="mul"', r[1]) - self.assertEqual(int(r[0]['content-length']), len(r[1])) - - def test_https(self): - """Test threadedhttp.Http.request using https://www.wikipedia.org/.""" - o = threadedhttp.Http() - r = o.request('https://www.wikipedia.org/') - self.assertIsInstance(r, tuple) - self.assertNotIsInstance(r[0], Exception) - self.assertIsInstance(r[0], dict) - self.assertIn('status', r[0]) - self.assertIsInstance(r[0]['status'], str) - self.assertEqual(r[0]['status'], '200') - - self.assertIsInstance(r[1], bytes) - self.assertIn(b'<html lang="mul"', r[1]) - self.assertEqual(int(r[0]['content-length']), len(r[1])) - - def test_gzip(self): - """Test threadedhttp.Http encodes using gzip.""" - o = threadedhttp.Http() - r = o.request('http://www.wikipedia.org/') - self.assertIsInstance(r, tuple) - self.assertNotIsInstance(r[0], Exception) - self.assertIn('-content-encoding', r[0]) - self.assertEqual(r[0]['-content-encoding'], 'gzip') - - url = 'https://test.wikidata.org/w/api.php?action=query&meta=siteinfo' - r = o.request(url) - self.assertIsInstance(r, tuple) - self.assertNotIsInstance(r[0], Exception) - self.assertIn('-content-encoding', r[0]) - self.assertEqual(r[0]['-content-encoding'], 'gzip') - - -class ThreadedHttpRequestQueueTestCase(TestCase): - - """Tests for threadedhttp module threaded HttpRequest.""" - - sites = { - 'www-wp': { - 'hostname': 'www.wikipedia.org', - }, - } - - def test_threading(self): - """Test using threadedhttp.""" - queue = Queue.Queue() - cookiejar = cookielib.CookieJar() - connection_pool = threadedhttp.ConnectionPool() - proc = threadedhttp.HttpProcessor(queue, cookiejar, connection_pool) - proc.setDaemon(True) - proc.start() - r = threadedhttp.HttpRequest('http://www.wikipedia.org/') - queue.put(r) - - self.assertNotIsInstance(r.exception, Exception) - self.assertIsInstance(r.data, tuple) - self.assertIsInstance(r.response_headers, dict) - self.assertIn('status', r.response_headers) - self.assertIsInstance(r.response_headers['status'], str) - self.assertEqual(r.response_headers['status'], '200') - self.assertEqual(r.status, 200) - - self.assertIsInstance(r.raw, bytes) - self.assertIn(b'<html lang="mul"', r.raw) - self.assertEqual(int(r.response_headers['content-length']), len(r.raw)) - - queue.put(None) # Stop the http processor thread class UserAgentTestCase(TestCase): @@ -354,7 +246,7 @@ def setUp(self): self.orig_format = config.user_agent_format - config.user_agent_format = '{script_product} ({script_comments}) {pwb} ({revision}) {httplib2} {python}' + config.user_agent_format = '{script_product} ({script_comments}) {pwb} ({revision}) {http_backend} {python}' def tearDown(self): config.user_agent_format = self.orig_format @@ -368,7 +260,7 @@ self.assertNotIn('()', http.user_agent()) self.assertNotIn('(;', http.user_agent()) self.assertNotIn(';)', http.user_agent()) - self.assertIn('httplib2/', http.user_agent()) + self.assertIn('requests/', http.user_agent()) self.assertIn('Python/' + str(sys.version_info[0]), http.user_agent()) @@ -385,13 +277,19 @@ @staticmethod def _create_request(charset=None, data=UTF8_BYTES): req = threadedhttp.HttpRequest(None, charset=charset) - req._data = ({'content-type': 'charset=utf-8'}, data[:]) + resp = requests.Response() + resp.headers = {'content-type': 'charset=utf-8'} + resp._content = data[:] + req._data = resp return req def test_no_charset(self): """Test decoding without explicit charset.""" req = threadedhttp.HttpRequest(None) - req._data = ({'content-type': ''}, CharsetTestCase.LATIN1_BYTES[:]) + resp = requests.Response() + resp.headers = {'content-type': ''} + resp._content = CharsetTestCase.LATIN1_BYTES[:] + req._data = resp self.assertIsNone(req.charset) self.assertEqual('latin1', req.encoding) self.assertEqual(req.raw, CharsetTestCase.LATIN1_BYTES) @@ -442,10 +340,11 @@ class BinaryTestCase(TestCase): - """Get binary file using httplib2 and pywikibot.""" + """Get binary file using requests and pywikibot.""" net = True + hostname = 'upload.wikimedia.org' url = 'https://upload.wikimedia.org/wikipedia/commons/f/fc/MP_sounds.png' @classmethod @@ -455,22 +354,15 @@ with open(os.path.join(_images_dir, 'MP_sounds.png'), 'rb') as f: cls.png = f.read() - def test_httplib2(self): - """Test with httplib2, underlying package.""" - h = httplib2.Http() - r = h.request(uri=self.url) + def test_requests(self): + """Test with requests, underlying package.""" + s = requests.Session() + r = s.get(self.url) - self.assertEqual(r[0]['content-type'], 'image/png') - self.assertEqual(r[1], self.png) + self.assertEqual(r.headers['content-type'], 'image/png') + self.assertEqual(r.content, self.png) - next(iter(h.connections.values())).close() - - def test_threadedhttp(self): - """Test with threadedhttp, internal layer on top of httplib2.""" - r = threadedhttp.Http().request(uri=self.url) - - self.assertEqual(r[0]['content-type'], 'image/png') - self.assertEqual(r[1], self.png) + s.close() def test_http(self): """Test with http, standard http interface for pywikibot.""" diff --git a/tests/utils.py b/tests/utils.py index db2b9a3..9b44240 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -385,5 +385,9 @@ @param args: list of arguments for pwb.py @type args: list of unicode """ - return execute(command=[sys.executable, _pwb_py] + args, - data_in=data_in, timeout=timeout, error=error) + if sys.version_info < (2, 7, 9): + return execute(command=[sys.executable, '-W ignore:A true', _pwb_py] + args, + data_in=data_in, timeout=timeout, error=error) + else: + return execute(command=[sys.executable, _pwb_py] + args, + data_in=data_in, timeout=timeout, error=error) -- To view, visit https://gerrit.wikimedia.org/r/213977 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ic56d43ae1160d1ced796426ce00afe544e88f9e5 Gerrit-PatchSet: 9 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: VcamX <vca...@gmail.com> Gerrit-Reviewer: John Vandenberg <jay...@gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhall...@arctus.nl> Gerrit-Reviewer: Ricordisamoa <ricordisa...@openmailbox.org> Gerrit-Reviewer: VcamX <vca...@gmail.com> Gerrit-Reviewer: XZise <commodorefabia...@gmx.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ Pywikibot-commits mailing list Pywikibot-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits