From 8dc023fcaceb587bfda8d2e71621de65b88de1fa Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Aug 2011 02:09:18 -0400 Subject: [PATCH] support for Maxlag and AssertEdit in API queries, via config.json and Site.__init__ kwargs; patched holes in Site._api_query, including stability improvements and support for retrying queries when servers lag --- bot/commands/ctcp.py | 2 +- bot/wiki/functions.py | 6 +++-- bot/wiki/site.py | 70 +++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/bot/commands/ctcp.py b/bot/commands/ctcp.py index 3993279..4b86b96 100644 --- a/bot/commands/ctcp.py +++ b/bot/commands/ctcp.py @@ -42,6 +42,6 @@ class Command(BaseCommand): elif command == "VERSION": default = "EarwigBot - 0.1-dev - Python/$1 https://github.com/earwig/earwigbot" - vers = config.metadata.get("ircVersion", default) + vers = config.irc.get("version", default) vers = vers.replace("$1", platform.python_version()) self.connection.notice(target, "\x01VERSION {0}\x01".format(vers)) diff --git a/bot/wiki/functions.py b/bot/wiki/functions.py index 5c4052c..7562a4e 100644 --- a/bot/wiki/functions.py +++ b/bot/wiki/functions.py @@ -86,7 +86,9 @@ def _get_site_object_from_dict(name, d): namespaces = d.get("namespaces", {}) login = (config.wiki.get("username"), config.wiki.get("password")) cookiejar = _get_cookiejar() - user_agent = config.metadata.get("userAgent") + user_agent = config.wiki.get("userAgent") + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") if user_agent: user_agent = user_agent.replace("$1", platform.python_version()) @@ -102,7 +104,7 @@ def _get_site_object_from_dict(name, d): return Site(name=name, project=project, lang=lang, base_url=base_url, article_path=article_path, script_path=script_path, sql=sql, namespaces=namespaces, login=login, cookiejar=cookiejar, - user_agent=user_agent) + user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag) def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. diff --git a/bot/wiki/site.py b/bot/wiki/site.py index eecf2c3..ab94947 100644 --- a/bot/wiki/site.py +++ b/bot/wiki/site.py @@ -5,6 +5,7 @@ from gzip import GzipFile from json import loads from re import escape as re_escape, match as re_match from StringIO import StringIO +from time import sleep from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError from urlparse import urlparse @@ -41,7 +42,7 @@ class Site(object): def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=(None, None), namespaces=None, login=(None, None), cookiejar=None, - user_agent=None): + user_agent=None, assert_edit=None, maxlag=None): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a @@ -69,6 +70,11 @@ class Site(object): self._sql = sql self._namespaces = namespaces + # Attributes used when querying the API: + self._assert_edit = assert_edit + self._maxlag = maxlag + self._max_retries = 5 + # Set up cookiejar and URL opener for making API queries: if cookiejar is not None: self._cookiejar = cookiejar @@ -90,22 +96,25 @@ class Site(object): if logged_in_as is None or name != logged_in_as: self._login(login) - def _api_query(self, params): + def _api_query(self, params, tries=0, wait=5): """Do an API query with `params` as a dict of parameters. This will first attempt to construct an API url from self._base_url and self._script_path. We need both of these, or else we'll raise SiteAPIError. - We'll encode the given params, adding format=json along the way, and - make the request through self._opener, which has built-in cookie + We'll encode the given params, adding format=json along the way, as + well as &assert= and &maxlag= based on self._assert_edit and _maxlag. + We make the request through self._opener, which has built-in cookie support via self._cookiejar, a User-Agent (wiki.constants.USER_AGENT), and Accept-Encoding set to "gzip". - + Assuming everything went well, we'll gunzip the data (if compressed), load it as a JSON object, and return it. - If our request failed, we'll raise SiteAPIError with details. + If our request failed for some reason, we'll raise SiteAPIError with + details. If that reason was due to maxlag, we'll sleep for a bit and + then repeat the query until we exceed self._max_retries. There's helpful MediaWiki API documentation at . @@ -115,7 +124,13 @@ class Site(object): raise SiteAPIError(e) url = ''.join((self._base_url, self._script_path, "/api.php")) + params["format"] = "json" # This is the only format we understand + if self._assert_edit: # If requested, ensure that we're logged in + params["assert"] = self._assert_edit + if self._maxlag: # If requested, don't overload the servers + params["maxlag"] = self._maxlag + data = urlencode(params) print url, data # debug code @@ -124,21 +139,44 @@ class Site(object): response = self._opener.open(url, data) except URLError as error: if hasattr(error, "reason"): - e = "API query at {0} failed because {1}." - e = e.format(error.geturl, error.reason) + e = "API query failed: {0}.".format(error.reason) elif hasattr(error, "code"): - e = "API query at {0} failed; got an error code of {1}." - e = e.format(error.geturl, error.code) + e = "API query failed: got an error code of {0}." + e = e.format(error.code) else: e = "API query failed." raise SiteAPIError(e) + + result = response.read() + if response.headers.get("Content-Encoding") == "gzip": + stream = StringIO(result) + gzipper = GzipFile(fileobj=stream) + result = gzipper.read() + + try: + res = loads(result) # Parse as a JSON object + except ValueError: + e = "API query failed: JSON could not be decoded." + raise SiteAPIError(e) + + try: + code = res["error"]["code"] + info = res["error"]["info"] + except KeyError: + return res + + if code == "maxlag": + if tries >= self._max_retries: + e = "Maximum number of retries reached ({0})." + raise SiteAPIError(e.format(self._max_retries)) + tries += 1 + msg = 'Server says: "{0}". Retrying in {1} seconds ({2}/{3}).' + print msg.format(info, wait, tries, self._max_retries) + sleep(wait) + return self._api_query(params, tries=tries, wait=wait*3) else: - result = response.read() - if response.headers.get("Content-Encoding") == "gzip": - stream = StringIO(result) - gzipper = GzipFile(fileobj=stream) - result = gzipper.read() - return loads(result) # Parse as a JSON object + e = 'API query failed: got error "{0}"; server says: "{1}".' + raise SiteAPIError(e.format(code, info)) def _load_attributes(self, force=False): """Load data about our Site from the API.