jenkins-bot has submitted this change and it was merged. Change subject: [FEAT] Support HTTP GET requests ......................................................................
[FEAT] Support HTTP GET requests This uses a HTTP GET request if possible instead of an HTTP POST if not one of the following conditions is applying: - The action is one of the actions which requires a POST request (e.g. action=upload). If the action is query, the query modules requires a POST request. - The config variable (maximum_GET_length) is lower than the actual param string. The family can overwrite this maximum length. If it is 0 (it accepts also ints < 0, but those don't make sense), it'll never use a GET request. - If after the first request the server returns a 414 it'll switch to the POST request mode and retry it. - If the site is not using SSL, but could be configured. The Request instance itself has an attribute 'use_get' which can overwrite this behaviour. If it's True the first try will always be a GET request (even if the action doesn't support it), if it's False it'll never try it with a GET request and if it's None, it'll check the action and if requires the querymodule. The value of 'True' is defined to allow calling action=paraminfo inside the request, which would itself call action=paraminfo if it didn't know that this doesn't require a GET request. It also ignores 'meta' parameters as those are all GETable and because ParamInfo requries meta=siteinfo and meta=userinfo. Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42 --- M pywikibot/comms/http.py M pywikibot/config2.py M pywikibot/data/api.py M pywikibot/exceptions.py M pywikibot/family.py 5 files changed, 73 insertions(+), 4 deletions(-) Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index 660e597..f3e93c8 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -66,7 +66,9 @@ from urllib2 import quote from pywikibot import config -from pywikibot.exceptions import FatalServerError, Server504Error +from pywikibot.exceptions import ( + FatalServerError, Server504Error, Server414Error +) from pywikibot.comms import threadedhttp from pywikibot.tools import deprecate_arg import pywikibot.version @@ -260,6 +262,9 @@ if request.data[0].status == 504: raise Server504Error("Server %s timed out" % site.hostname()) + if request.data[0].status == 414: + raise Server414Error('Too long GET request') + # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if request.data[0].status not in (200, 207): diff --git a/pywikibot/config2.py b/pywikibot/config2.py index e24ebca..0020d5d 100644 --- a/pywikibot/config2.py +++ b/pywikibot/config2.py @@ -89,6 +89,14 @@ # number of days to cache namespaces, api configuration, etc. API_config_expiry = 30 +# The maximum number of bytes which uses a GET request, if not positive +# it'll always use POST requests +maximum_GET_length = 255 +# Some networks modify GET requests when they are not encrypted, to avoid +# bug reports related to that disable those. If we are confident that bug +# related to this are really because of the network this could be changed. +enable_GET_without_SSL = False + # Solve captchas in the webbrowser. Setting this to False will result in the # exception CaptchaError being thrown if a captcha is encountered. solve_captcha = True diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 01d36d9..3eb69a1 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -26,7 +26,9 @@ import pywikibot from pywikibot import config, login from pywikibot.tools import MediaWikiVersion as LV, deprecated, itergroup -from pywikibot.exceptions import Server504Error, FatalServerError, Error +from pywikibot.exceptions import ( + Server504Error, Server414Error, FatalServerError, Error +) import sys @@ -297,6 +299,7 @@ params = { 'expiry': config.API_config_expiry, + 'use_get': True, # Request need ParamInfo to determine use_get 'site': self.site, 'action': 'paraminfo', } @@ -534,6 +537,9 @@ @kwarg retry_wait: (optional) Minimum time to wait after an error, defaults to 5 seconds (doubles each retry until max of 120 is reached) + @kwarg use_get: (optional) Use HTTP GET request if possible. If False + it uses a POST request. If None, it'll try to determine via + action=paraminfo if the action requires a POST. @kwarg format: (optional) Defaults to "json" """ try: @@ -549,6 +555,7 @@ else: self.mime = kwargs.pop('mime', False) self.throttle = kwargs.pop('throttle', True) + self.use_get = kwargs.pop('use_get', None) self.max_retries = kwargs.pop("max_retries", pywikibot.config.max_retries) self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait) self._params = {} @@ -877,6 +884,27 @@ """ self._add_defaults() + if (not config.enable_GET_without_SSL and + self.site.protocol() != 'https'): + use_get = False + elif self.use_get is None: + if self.action == 'query': + # for queries check the query module + modules = set() + for mod_type_name in ('list', 'prop', 'generator'): + modules.update(self._params.get(mod_type_name, [])) + else: + modules = set([self.action]) + if modules: + self.site._paraminfo.fetch(modules) + use_get = all(['mustbeposted' not in self.site._paraminfo[mod] + for mod in modules]) + else: + # If modules is empty, just 'meta' was given, which doesn't + # require POSTs, and is required for ParamInfo + use_get = True + else: + use_get = self.use_get while True: paramstring = self._http_param_string() simulate = self._simulate(self.action) @@ -892,17 +920,35 @@ if self.mime: (headers, body) = Request._build_mime_request( self._encoded_items(), self.mime_params) + use_get = False # MIME requests require HTTP POST else: headers = {'Content-Type': 'application/x-www-form-urlencoded'} - body = paramstring + if (not self.site.maximum_GET_length() or + self.site.maximum_GET_length() < len(paramstring)): + use_get = False + if use_get: + uri = '{0}?{1}'.format(uri, paramstring) + body = None # default in httplib2 + else: + body = paramstring rawdata = http.request( - self.site, uri, method="POST", + self.site, uri, method='GET' if use_get else 'POST', headers=headers, body=body) except Server504Error: pywikibot.log(u"Caught HTTP 504 error; retrying") self.wait() continue + except Server414Error: + if use_get: + pywikibot.log('Caught HTTP 414 error; retrying') + use_get = False + self.wait() + continue + else: + pywikibot.warning('Caught HTTP 414 error, although not ' + 'using GET.') + raise except FatalServerError: # This error is not going to be fixed by just waiting pywikibot.error(traceback.format_exc()) diff --git a/pywikibot/exceptions.py b/pywikibot/exceptions.py index 298ef6c..b5013df 100644 --- a/pywikibot/exceptions.py +++ b/pywikibot/exceptions.py @@ -338,6 +338,13 @@ pass +class Server414Error(Error): + + """Server returned with HTTP 414 code.""" + + pass + + class BadTitle(Error): """Server responded with BadTitle.""" diff --git a/pywikibot/family.py b/pywikibot/family.py index f2f13e3..9daf228 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -1086,6 +1086,9 @@ else: return code + def maximum_GET_length(self, code): + return config.maximum_GET_length + def dbName(self, code): # returns the name of the MySQL database return '%s%s' % (code, self.name) -- To view, visit https://gerrit.wikimedia.org/r/173055 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42 Gerrit-PatchSet: 6 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: XZise <commodorefabia...@gmx.de> Gerrit-Reviewer: John Vandenberg <jay...@gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhall...@arctus.nl> Gerrit-Reviewer: XZise <commodorefabia...@gmx.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits