jenkins-bot has submitted this change and it was merged.

Change subject: [FEAT] Support HTTP GET requests
......................................................................


[FEAT] Support HTTP GET requests

This uses a HTTP GET request if possible instead of an HTTP POST if not
one of the following conditions is applying:

- The action is one of the actions which requires a POST request (e.g.
  action=upload). If the action is query, the query modules requires a
  POST request.
- The config variable (maximum_GET_length) is lower than the actual
  param string. The family can overwrite this maximum length. If it is 0
  (it accepts also ints < 0, but those don't make sense), it'll never
  use a GET request.
- If after the first request the server returns a 414 it'll switch to
  the POST request mode and retry it.
- If the site is not using SSL, but could be configured.

The Request instance itself has an attribute 'use_get' which can
overwrite this behaviour. If it's True the first try will always be a
GET request (even if the action doesn't support it), if it's False it'll
never try it with a GET request and if it's None, it'll check the action
and if requires the querymodule.

The value of 'True' is defined to allow calling action=paraminfo inside
the request, which would itself call action=paraminfo if it didn't know
that this doesn't require a GET request. It also ignores 'meta'
parameters as those are all GETable and because ParamInfo requries
meta=siteinfo and meta=userinfo.

Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
---
M pywikibot/comms/http.py
M pywikibot/config2.py
M pywikibot/data/api.py
M pywikibot/exceptions.py
M pywikibot/family.py
5 files changed, 73 insertions(+), 4 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 660e597..f3e93c8 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -66,7 +66,9 @@
     from urllib2 import quote
 
 from pywikibot import config
-from pywikibot.exceptions import FatalServerError, Server504Error
+from pywikibot.exceptions import (
+    FatalServerError, Server504Error, Server414Error
+)
 from pywikibot.comms import threadedhttp
 from pywikibot.tools import deprecate_arg
 import pywikibot.version
@@ -260,6 +262,9 @@
     if request.data[0].status == 504:
         raise Server504Error("Server %s timed out" % site.hostname())
 
+    if request.data[0].status == 414:
+        raise Server414Error('Too long GET request')
+
     # HTTP status 207 is also a success status for Webdav FINDPROP,
     # used by the version module.
     if request.data[0].status not in (200, 207):
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index e24ebca..0020d5d 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -89,6 +89,14 @@
 # number of days to cache namespaces, api configuration, etc.
 API_config_expiry = 30
 
+# The maximum number of bytes which uses a GET request, if not positive
+# it'll always use POST requests
+maximum_GET_length = 255
+# Some networks modify GET requests when they are not encrypted, to avoid
+# bug reports related to that disable those. If we are confident that bug
+# related to this are really because of the network this could be changed.
+enable_GET_without_SSL = False
+
 # Solve captchas in the webbrowser. Setting this to False will result in the
 # exception CaptchaError being thrown if a captcha is encountered.
 solve_captcha = True
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 01d36d9..3eb69a1 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -26,7 +26,9 @@
 import pywikibot
 from pywikibot import config, login
 from pywikibot.tools import MediaWikiVersion as LV, deprecated, itergroup
-from pywikibot.exceptions import Server504Error, FatalServerError, Error
+from pywikibot.exceptions import (
+    Server504Error, Server414Error, FatalServerError, Error
+)
 
 import sys
 
@@ -297,6 +299,7 @@
 
             params = {
                 'expiry': config.API_config_expiry,
+                'use_get': True,  # Request need ParamInfo to determine use_get
                 'site': self.site,
                 'action': 'paraminfo',
             }
@@ -534,6 +537,9 @@
         @kwarg retry_wait: (optional) Minimum time to wait after an error,
                defaults to 5 seconds (doubles each retry until max of 120 is
                reached)
+        @kwarg use_get: (optional) Use HTTP GET request if possible. If False
+               it uses a POST request. If None, it'll try to determine via
+               action=paraminfo if the action requires a POST.
         @kwarg format: (optional) Defaults to "json"
         """
         try:
@@ -549,6 +555,7 @@
         else:
             self.mime = kwargs.pop('mime', False)
         self.throttle = kwargs.pop('throttle', True)
+        self.use_get = kwargs.pop('use_get', None)
         self.max_retries = kwargs.pop("max_retries", 
pywikibot.config.max_retries)
         self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait)
         self._params = {}
@@ -877,6 +884,27 @@
 
         """
         self._add_defaults()
+        if (not config.enable_GET_without_SSL and
+                self.site.protocol() != 'https'):
+            use_get = False
+        elif self.use_get is None:
+            if self.action == 'query':
+                # for queries check the query module
+                modules = set()
+                for mod_type_name in ('list', 'prop', 'generator'):
+                    modules.update(self._params.get(mod_type_name, []))
+            else:
+                modules = set([self.action])
+            if modules:
+                self.site._paraminfo.fetch(modules)
+                use_get = all(['mustbeposted' not in self.site._paraminfo[mod]
+                               for mod in modules])
+            else:
+                # If modules is empty, just 'meta' was given, which doesn't
+                # require POSTs, and is required for ParamInfo
+                use_get = True
+        else:
+            use_get = self.use_get
         while True:
             paramstring = self._http_param_string()
             simulate = self._simulate(self.action)
@@ -892,17 +920,35 @@
                 if self.mime:
                     (headers, body) = Request._build_mime_request(
                         self._encoded_items(), self.mime_params)
+                    use_get = False  # MIME requests require HTTP POST
                 else:
                     headers = {'Content-Type': 
'application/x-www-form-urlencoded'}
-                    body = paramstring
+                    if (not self.site.maximum_GET_length() or
+                            self.site.maximum_GET_length() < len(paramstring)):
+                        use_get = False
+                    if use_get:
+                        uri = '{0}?{1}'.format(uri, paramstring)
+                        body = None  # default in httplib2
+                    else:
+                        body = paramstring
 
                 rawdata = http.request(
-                    self.site, uri, method="POST",
+                    self.site, uri, method='GET' if use_get else 'POST',
                     headers=headers, body=body)
             except Server504Error:
                 pywikibot.log(u"Caught HTTP 504 error; retrying")
                 self.wait()
                 continue
+            except Server414Error:
+                if use_get:
+                    pywikibot.log('Caught HTTP 414 error; retrying')
+                    use_get = False
+                    self.wait()
+                    continue
+                else:
+                    pywikibot.warning('Caught HTTP 414 error, although not '
+                                      'using GET.')
+                    raise
             except FatalServerError:
                 # This error is not going to be fixed by just waiting
                 pywikibot.error(traceback.format_exc())
diff --git a/pywikibot/exceptions.py b/pywikibot/exceptions.py
index 298ef6c..b5013df 100644
--- a/pywikibot/exceptions.py
+++ b/pywikibot/exceptions.py
@@ -338,6 +338,13 @@
     pass
 
 
+class Server414Error(Error):
+
+    """Server returned with HTTP 414 code."""
+
+    pass
+
+
 class BadTitle(Error):
 
     """Server responded with BadTitle."""
diff --git a/pywikibot/family.py b/pywikibot/family.py
index f2f13e3..9daf228 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -1086,6 +1086,9 @@
         else:
             return code
 
+    def maximum_GET_length(self, code):
+        return config.maximum_GET_length
+
     def dbName(self, code):
         # returns the name of the MySQL database
         return '%s%s' % (code, self.name)

-- 
To view, visit https://gerrit.wikimedia.org/r/173055
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
Gerrit-PatchSet: 6
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <commodorefabia...@gmx.de>
Gerrit-Reviewer: John Vandenberg <jay...@gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhall...@arctus.nl>
Gerrit-Reviewer: XZise <commodorefabia...@gmx.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to