diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index 1f4d277..555a84b 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -26,7 +26,7 @@ from json import loads from os.path import expanduser from re import escape as re_escape, match as re_match from StringIO import StringIO -from time import sleep +from time import sleep, time from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError from urlparse import urlparse @@ -74,7 +74,8 @@ class Site(object): article_path=None, script_path=None, sql=None, namespaces=None, login=(None, None), cookiejar=None, user_agent=None, use_https=False, assert_edit=None, - maxlag=None, search_config=(None, None)): + maxlag=None, wait_between_queries=5, + search_config=(None, None)): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a @@ -106,7 +107,9 @@ class Site(object): self._use_https = use_https self._assert_edit = assert_edit self._maxlag = maxlag + self._wait_between_queries = wait_between_queries self._max_retries = 5 + self._last_query_time = 0 # Attributes used for SQL queries: self._sql_data = sql @@ -172,9 +175,10 @@ class Site(object): We'll encode the given params, adding format=json along the way, as well as &assert= and &maxlag= based on self._assert_edit and _maxlag. - We make the request through self._opener, which has built-in cookie - support via self._cookiejar, a User-Agent (wiki.constants.USER_AGENT), - and Accept-Encoding set to "gzip". + Additionally, we'll sleep a bit if the last query was made less than + self._wait_between_queries seconds ago. The request is made through + self._opener, which has cookie support (self._cookiejar), a User-Agent + (wiki.constants.USER_AGENT), and Accept-Encoding set to "gzip". Assuming everything went well, we'll gunzip the data (if compressed), load it as a JSON object, and return it. @@ -204,6 +208,13 @@ class Site(object): if self._maxlag: # If requested, don't overload the servers params["maxlag"] = self._maxlag + since_last_query = time() - self._last_query_time # Throttling support + if since_last_query < self._wait_between_queries: + wait_time = self._wait_between_queries - since_last_query + logger.debug("Throttled: waiting {0} seconds".format(wait_time)) + sleep(wait_time) + self._last_query_time = time() + data = urlencode(params) logger.debug("{0} -> {1}".format(url, data)) diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py index 1f525f1..c6e3f57 100644 --- a/earwigbot/wiki/sitesdb.py +++ b/earwigbot/wiki/sitesdb.py @@ -158,6 +158,7 @@ class SitesDB(object): use_https = config.wiki.get("useHTTPS", False) assert_edit = config.wiki.get("assert") maxlag = config.wiki.get("maxlag") + wait_between_queries = config.wiki.get("waitTime", 5) search_config = config.wiki.get("search") if user_agent: @@ -169,7 +170,8 @@ class SitesDB(object): sql=sql, namespaces=namespaces, login=login, cookiejar=cookiejar, user_agent=user_agent, use_https=use_https, assert_edit=assert_edit, - maxlag=maxlag, search_config=search_config) + maxlag=maxlag, wait_between_queries=wait_between_queries, + search_config=search_config) def _get_site_name_from_sitesdb(self, project, lang): """Return the name of the first site with the given project and lang. @@ -320,13 +322,15 @@ class SitesDB(object): use_https = config.wiki.get("useHTTPS", False) assert_edit = config.wiki.get("assert") maxlag = config.wiki.get("maxlag") + wait_between_queries = config.wiki.get("waitTime", 5) search_config = config.wiki.get("search") - # Create a temp Site object to log in and load the other attributes: + # Create a Site object to log in and load the other attributes: site = Site(base_url=base_url, script_path=script_path, sql=sql, login=login, cookiejar=cookiejar, user_agent=user_agent, use_https=use_https, assert_edit=assert_edit, - maxlag=maxlag, search_config=search_config) + maxlag=maxlag, wait_between_queries=wait_between_queries, + search_config=search_config) self._add_site_to_sitesdb(site) return site