From 6433957ae9917f3dc06e5022d8148d6bea6047d1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 5 Jul 2012 17:46:21 -0400 Subject: [PATCH] First stages of service delegation. --- earwigbot/commands/afc_status.py | 7 ++--- earwigbot/exceptions.py | 25 +++++++++++------- earwigbot/tasks/afc_statistics.py | 2 +- earwigbot/wiki/category.py | 55 +++++++++++++++++++++++++++++++++------ earwigbot/wiki/page.py | 34 ++++++++++++------------ earwigbot/wiki/site.py | 33 ++++++++++++++++++----- earwigbot/wiki/user.py | 14 +++++----- 7 files changed, 116 insertions(+), 54 deletions(-) diff --git a/earwigbot/commands/afc_status.py b/earwigbot/commands/afc_status.py index 4f517d5..c03da85 100644 --- a/earwigbot/commands/afc_status.py +++ b/earwigbot/commands/afc_status.py @@ -109,12 +109,9 @@ class AFCStatus(Command): def count_submissions(self): """Returns the number of open AFC submissions (count of CAT:PEND).""" - cat = self.site.get_category("Pending AfC submissions") - subs = len(cat.get_members(use_sql=True)) - - # Remove [[Wikipedia:Articles for creation/Redirects]] and + # Subtract two for [[Wikipedia:Articles for creation/Redirects]] and # [[Wikipedia:Files for upload]], which aren't real submissions: - return subs - 2 + return self.site.get_category("Pending AfC submissions").pages - 2 def count_redirects(self): """Returns the number of open redirect submissions. Calculated as the diff --git a/earwigbot/exceptions.py b/earwigbot/exceptions.py index b06354a..193c9ed 100644 --- a/earwigbot/exceptions.py +++ b/earwigbot/exceptions.py @@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot:: | +-- BrokenSocketError +-- WikiToolsetError +-- SiteNotFoundError - +-- SiteAPIError + +-- NoServiceError + +-- APIError + +-- SQLError +-- LoginError +-- NamespaceNotFoundError +-- PageNotFoundError @@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot:: | +-- ContentTooBigError | +-- SpamDetectedError | +-- FilteredError - +-- SQLError +-- CopyvioCheckError +-- UnknownSearchEngineError +-- UnsupportedSearchEngineError @@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError): Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`. """ -class SiteAPIError(WikiToolsetError): +class NoServiceError(WikiToolsetError): + """No service is functioning to handle a specific task. + + Raised by :py:meth:`Site.delegate `. + """ + +class APIError(WikiToolsetError): """Couldn't connect to a site's API. Perhaps the server doesn't exist, our URL is wrong or incomplete, or @@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError): Raised by :py:meth:`Site.api_query `. """ +class SQLError(WikiToolsetError): + """Some error involving SQL querying occurred. + + Raised by :py:meth:`Site.sql_query `. + """ + class LoginError(WikiToolsetError): """An error occured while trying to login. @@ -188,12 +201,6 @@ class FilteredError(EditError): :py:meth:`Page.add_section `. """ -class SQLError(WikiToolsetError): - """Some error involving SQL querying occurred. - - Raised by :py:meth:`Site.sql_query `. - """ - class CopyvioCheckError(WikiToolsetError): """An error occured when checking a page for copyright violations. diff --git a/earwigbot/tasks/afc_statistics.py b/earwigbot/tasks/afc_statistics.py index 5b200db..69dd311 100644 --- a/earwigbot/tasks/afc_statistics.py +++ b/earwigbot/tasks/afc_statistics.py @@ -663,7 +663,7 @@ class AFCStatistics(Task): return None, None, None try: content = self.get_revision_content(revid) - except exceptions.SiteAPIError: + except exceptions.APIError: msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})" self.logger.exception(msg.format(pageid, chart)) return None, None, None diff --git a/earwigbot/wiki/category.py b/earwigbot/wiki/category.py index 2df9a0e..c88a163 100644 --- a/earwigbot/wiki/category.py +++ b/earwigbot/wiki/category.py @@ -49,7 +49,7 @@ class Category(Page): def __str__(self): """Return a nice string representation of the Category.""" - return ''.format(self.title, str(self._site)) + return ''.format(self.title, str(self.site)) def _get_members_via_sql(self, limit, follow): """Iterate over Pages in the category using SQL.""" @@ -60,32 +60,32 @@ class Category(Page): if limit: query += " LIMIT ?" - result = self._site.sql_query(query, (title, limit)) + result = self.site.sql_query(query, (title, limit)) else: - result = self._site.sql_query(query, (title,)) + result = self.site.sql_query(query, (title,)) members = list(result) for row in members: base = row[0].replace("_", " ").decode("utf8") - namespace = self._site.namespace_id_to_name(row[1]) + namespace = self.site.namespace_id_to_name(row[1]) if namespace: title = u":".join((namespace, base)) else: # Avoid doing a silly (albeit valid) ":Pagename" thing title = base - yield self._site.get_page(title, follow_redirects=follow, + yield self.site.get_page(title, follow_redirects=follow, pageid=row[2]) def _get_members_via_api(self, limit, follow): """Iterate over Pages in the category using the API.""" params = {"action": "query", "list": "categorymembers", - "cmtitle": self._title} + "cmtitle": self.title} while 1: params["cmlimit"] = limit if limit else "max" - result = self._site.api_query(**params) + result = self.site.api_query(**params) for member in result["query"]["categorymembers"]: title = member["title"] - yield self._site.get_page(title, follow_redirects=follow) + yield self.site.get_page(title, follow_redirects=follow) if "query-continue" in result: qcontinue = result["query-continue"]["categorymembers"] @@ -95,6 +95,45 @@ class Category(Page): else: break + def _get_size_via_sql(self, member_type): + query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" + title = self.title.replace(" ", "_").split(":", 1)[1] + if member_type == "size": + result = self.site.sql_query(query, (title,)) + else: + query += " AND cl_type = ?" + result = self.site.sql_query(query, (title, member_type[:-1])) + return list(result)[0] + + def _get_size_via_sql(self, member_type): + result = self.site.api_query(action="query", prop="categoryinfo", + cmtitle=self.title) + info = result["query"]["pages"].values()[0]["categoryinfo"] + return info[member_type] + + def _get_size(self, member_type): + services = { + self.site.SERVICE_API: self._size_via_api, + self.site.SERVICE_SQL: self._size_via_sql + } + return self.site.delegate(services, (member_type,)) + + @property + def size(self): + return self._get_size("size") + + @property + def pages(self): + return self._get_size("pages") + + @property + def files(self): + return self._get_size("files") + + @property + def subcats(self): + return self._get_size("subcats") + def get_members(self, use_sql=False, limit=None, follow_redirects=None): """Iterate over Pages in the category. diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py index d3b839d..bebd355 100644 --- a/earwigbot/wiki/page.py +++ b/earwigbot/wiki/page.py @@ -117,7 +117,7 @@ class Page(CopyrightMixIn): prefix = self._title.split(":", 1)[0] if prefix != title: # ignore a page that's titled "Category" or "User" try: - self._namespace = self._site.namespace_name_to_id(prefix) + self._namespace = self.site.namespace_name_to_id(prefix) except exceptions.NamespaceNotFoundError: self._namespace = 0 else: @@ -137,7 +137,7 @@ class Page(CopyrightMixIn): def __str__(self): """Return a nice string representation of the Page.""" - return ''.format(self.title, str(self._site)) + return ''.format(self.title, str(self.site)) def _assert_validity(self): """Used to ensure that our page's title is valid. @@ -199,7 +199,7 @@ class Page(CopyrightMixIn): Assuming the API is sound, this should not raise any exceptions. """ if not result: - query = self._site.api_query + query = self.site.api_query result = query(action="query", rvprop="user", intoken="edit", prop="info|revisions", rvlimit=1, rvdir="newer", titles=self._title, inprop="protection|url") @@ -263,7 +263,7 @@ class Page(CopyrightMixIn): want to force content reloading. """ if not result: - query = self._site.api_query + query = self.site.api_query result = query(action="query", prop="revisions", rvlimit=1, rvprop="content|timestamp", titles=self._title) @@ -310,8 +310,8 @@ class Page(CopyrightMixIn): # Try the API query, catching most errors with our handler: try: - result = self._site.api_query(**params) - except exceptions.SiteAPIError as error: + result = self.site.api_query(**params) + except exceptions.APIError as error: if not hasattr(error, "code"): raise # We can only handle errors with a code attribute result = self._handle_edit_errors(error, params, tries) @@ -375,12 +375,12 @@ class Page(CopyrightMixIn): elif error.code in ["noedit-anon", "cantcreate-anon", "noimageredirect-anon"]: - if not all(self._site._login_info): + if not all(self.site._login_info): # Insufficient login info: raise exceptions.PermissionsError(error.info) if tries == 0: # We have login info; try to login: - self._site._login(self._site._login_info) + self.site._login(self.site._login_info) self._token = None # Need a new token; old one is invalid now return self._edit(params=params, tries=1) else: @@ -416,13 +416,13 @@ class Page(CopyrightMixIn): log in. Otherwise, raise PermissionsError with details. """ if assertion == "user": - if not all(self._site._login_info): + if not all(self.site._login_info): # Insufficient login info: e = "AssertEdit: user assertion failed, and no login info was provided." raise exceptions.PermissionsError(e) if tries == 0: # We have login info; try to login: - self._site._login(self._site._login_info) + self.site._login(self.site._login_info) self._token = None # Need a new token; old one is invalid now return self._edit(params=params, tries=1) else: @@ -502,8 +502,8 @@ class Page(CopyrightMixIn): return self._fullurl else: slug = quote(self._title.replace(" ", "_"), safe="/:") - path = self._site._article_path.replace("$1", slug) - return ''.join((self._site.url, path)) + path = self.site._article_path.replace("$1", slug) + return ''.join((self.site.url, path)) @property def namespace(self): @@ -580,7 +580,7 @@ class Page(CopyrightMixIn): otherwise missing or invalid. """ if self._namespace < 0: - ns = self._site.namespace_id_to_name(self._namespace) + ns = self.site.namespace_id_to_name(self._namespace) e = u"Pages in the {0} namespace can't have talk pages.".format(ns) raise exceptions.InvalidPageError(e) @@ -594,7 +594,7 @@ class Page(CopyrightMixIn): except IndexError: body = self._title - new_prefix = self._site.namespace_id_to_name(new_ns) + new_prefix = self.site.namespace_id_to_name(new_ns) # If the new page is in namespace 0, don't do ":Title" (it's correct, # but unnecessary), just do "Title": @@ -605,7 +605,7 @@ class Page(CopyrightMixIn): if follow_redirects is None: follow_redirects = self._follow_redirects - return Page(self._site, new_title, follow_redirects) + return Page(self.site, new_title, follow_redirects) def get(self): """Return page content, which is cached if you try to call get again. @@ -616,7 +616,7 @@ class Page(CopyrightMixIn): if self._exists == self.PAGE_UNKNOWN: # Kill two birds with one stone by doing an API query for both our # attributes and our page content: - query = self._site.api_query + query = self.site.api_query result = query(action="query", rvlimit=1, titles=self._title, prop="info|revisions", inprop="protection|url", intoken="edit", rvprop="content|timestamp") @@ -680,7 +680,7 @@ class Page(CopyrightMixIn): if not self._creator: self._load() self._assert_existence() - return self._site.get_user(self._creator) + return self.site.get_user(self._creator) def parse(self): """Parse the page content for templates, links, etc. diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index 2f9bc2b..0a4f2e0 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -82,6 +82,8 @@ class Site(object): - :py:meth:`get_category`: returns a Category for the given title - :py:meth:`get_user`: returns a User object for the given name """ + SERVICE_API = 1 + SERVICE_SQL = 2 def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=None, @@ -228,7 +230,7 @@ class Site(object): e = e.format(error.code) else: e = "API query failed." - raise exceptions.SiteAPIError(e) + raise exceptions.APIError(e) result = response.read() if response.headers.get("Content-Encoding") == "gzip": @@ -242,7 +244,7 @@ class Site(object): """Given API query params, return the URL to query and POST data.""" if not self._base_url or self._script_path is None: e = "Tried to do an API query, but no API URL is known." - raise exceptions.SiteAPIError(e) + raise exceptions.APIError(e) url = ''.join((self.url, self._script_path, "/api.php")) params["format"] = "json" # This is the only format we understand @@ -260,7 +262,7 @@ class Site(object): res = loads(result) # Try to parse as a JSON object except ValueError: e = "API query failed: JSON could not be decoded." - raise exceptions.SiteAPIError(e) + raise exceptions.APIError(e) try: code = res["error"]["code"] @@ -271,7 +273,7 @@ class Site(object): if code == "maxlag": # We've been throttled by the server if tries >= self._max_retries: e = "Maximum number of retries reached ({0})." - raise exceptions.SiteAPIError(e.format(self._max_retries)) + raise exceptions.APIError(e.format(self._max_retries)) tries += 1 msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' self._logger.info(msg.format(info, wait, tries, self._max_retries)) @@ -279,7 +281,7 @@ class Site(object): return self._api_query(params, tries=tries, wait=wait*2) else: # Some unknown error occurred e = 'API query failed: got error "{0}"; server says: "{1}".' - error = exceptions.SiteAPIError(e.format(code, info)) + error = exceptions.APIError(e.format(code, info)) error.code, error.info = code, info raise error @@ -522,6 +524,10 @@ class Site(object): self._sql_conn = oursql.connect(**args) + def _get_service_order(self): + """DOCSTRING """ + return [self.SERVICE_SQL, self.SERVICE_API] + @property def name(self): """The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" @@ -559,7 +565,7 @@ class Site(object): This will first attempt to construct an API url from :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need both of these, or else we'll raise - :py:exc:`~earwigbot.exceptions.SiteAPIError`. If + :py:exc:`~earwigbot.exceptions.APIError`. If :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki 1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is ``True``, otherwise HTTP. @@ -578,7 +584,7 @@ class Site(object): load it as a JSON object, and return it. If our request failed for some reason, we'll raise - :py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that + :py:exc:`~earwigbot.exceptions.APIError` with details. If that reason was due to maxlag, we'll sleep for a bit and then repeat the query until we exceed :py:attr:`self._max_retries`. @@ -739,3 +745,16 @@ class Site(object): else: username = self._get_username() return User(self, username) + + def delegate(self, services, args=None, kwargs=None): + """ DOCSTRING""" + if not args: + args = () + if not kwargs: + kwargs = {} + + order = self._get_service_order() + for srv in order: + if srv in services: + return services[srv](*args, **kwargs) + raise exceptions.NoServiceError(services) diff --git a/earwigbot/wiki/user.py b/earwigbot/wiki/user.py index 9762824..b71b502 100644 --- a/earwigbot/wiki/user.py +++ b/earwigbot/wiki/user.py @@ -82,7 +82,7 @@ class User(object): def __str__(self): """Return a nice string representation of the User.""" - return ''.format(self._name, str(self._site)) + return ''.format(self.name, str(self.site)) def _get_attribute(self, attr): """Internally used to get an attribute by name. @@ -107,8 +107,8 @@ class User(object): is not defined. This defines it. """ props = "blockinfo|groups|rights|editcount|registration|emailable|gender" - result = self._site.api_query(action="query", list="users", - ususers=self._name, usprop=props) + result = self.site.api_query(action="query", list="users", + ususers=self._name, usprop=props) res = result["query"]["users"][0] # normalize our username in case it was entered oddly @@ -275,9 +275,9 @@ class User(object): No checks are made to see if it exists or not. Proper site namespace conventions are followed. """ - prefix = self._site.namespace_id_to_name(constants.NS_USER) + prefix = self.site.namespace_id_to_name(constants.NS_USER) pagename = ':'.join((prefix, self._name)) - return Page(self._site, pagename) + return Page(self.site, pagename) def get_talkpage(self): """Return a Page object representing the user's talkpage. @@ -285,6 +285,6 @@ class User(object): No checks are made to see if it exists or not. Proper site namespace conventions are followed. """ - prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK) + prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) pagename = ':'.join((prefix, self._name)) - return Page(self._site, pagename) + return Page(self.site, pagename)