@@ -109,12 +109,9 @@ class AFCStatus(Command): | |||
def count_submissions(self): | |||
"""Returns the number of open AFC submissions (count of CAT:PEND).""" | |||
cat = self.site.get_category("Pending AfC submissions") | |||
subs = len(cat.get_members(use_sql=True)) | |||
# Remove [[Wikipedia:Articles for creation/Redirects]] and | |||
# Subtract two for [[Wikipedia:Articles for creation/Redirects]] and | |||
# [[Wikipedia:Files for upload]], which aren't real submissions: | |||
return subs - 2 | |||
return self.site.get_category("Pending AfC submissions").pages - 2 | |||
def count_redirects(self): | |||
"""Returns the number of open redirect submissions. Calculated as the | |||
@@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot:: | |||
| +-- BrokenSocketError | |||
+-- WikiToolsetError | |||
+-- SiteNotFoundError | |||
+-- SiteAPIError | |||
+-- NoServiceError | |||
+-- APIError | |||
+-- SQLError | |||
+-- LoginError | |||
+-- NamespaceNotFoundError | |||
+-- PageNotFoundError | |||
@@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot:: | |||
| +-- ContentTooBigError | |||
| +-- SpamDetectedError | |||
| +-- FilteredError | |||
+-- SQLError | |||
+-- CopyvioCheckError | |||
+-- UnknownSearchEngineError | |||
+-- UnsupportedSearchEngineError | |||
@@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError): | |||
Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`. | |||
""" | |||
class SiteAPIError(WikiToolsetError): | |||
class NoServiceError(WikiToolsetError): | |||
"""No service is functioning to handle a specific task. | |||
Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
class APIError(WikiToolsetError): | |||
"""Couldn't connect to a site's API. | |||
Perhaps the server doesn't exist, our URL is wrong or incomplete, or | |||
@@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError): | |||
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. | |||
""" | |||
class SQLError(WikiToolsetError): | |||
"""Some error involving SQL querying occurred. | |||
Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
""" | |||
class LoginError(WikiToolsetError): | |||
"""An error occured while trying to login. | |||
@@ -188,12 +201,6 @@ class FilteredError(EditError): | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class SQLError(WikiToolsetError): | |||
"""Some error involving SQL querying occurred. | |||
Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
""" | |||
class CopyvioCheckError(WikiToolsetError): | |||
"""An error occured when checking a page for copyright violations. | |||
@@ -663,7 +663,7 @@ class AFCStatistics(Task): | |||
return None, None, None | |||
try: | |||
content = self.get_revision_content(revid) | |||
except exceptions.SiteAPIError: | |||
except exceptions.APIError: | |||
msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})" | |||
self.logger.exception(msg.format(pageid, chart)) | |||
return None, None, None | |||
@@ -49,7 +49,7 @@ class Category(Page): | |||
def __str__(self): | |||
"""Return a nice string representation of the Category.""" | |||
return '<Category "{0}" of {1}>'.format(self.title, str(self._site)) | |||
return '<Category "{0}" of {1}>'.format(self.title, str(self.site)) | |||
def _get_members_via_sql(self, limit, follow): | |||
"""Iterate over Pages in the category using SQL.""" | |||
@@ -60,32 +60,32 @@ class Category(Page): | |||
if limit: | |||
query += " LIMIT ?" | |||
result = self._site.sql_query(query, (title, limit)) | |||
result = self.site.sql_query(query, (title, limit)) | |||
else: | |||
result = self._site.sql_query(query, (title,)) | |||
result = self.site.sql_query(query, (title,)) | |||
members = list(result) | |||
for row in members: | |||
base = row[0].replace("_", " ").decode("utf8") | |||
namespace = self._site.namespace_id_to_name(row[1]) | |||
namespace = self.site.namespace_id_to_name(row[1]) | |||
if namespace: | |||
title = u":".join((namespace, base)) | |||
else: # Avoid doing a silly (albeit valid) ":Pagename" thing | |||
title = base | |||
yield self._site.get_page(title, follow_redirects=follow, | |||
yield self.site.get_page(title, follow_redirects=follow, | |||
pageid=row[2]) | |||
def _get_members_via_api(self, limit, follow): | |||
"""Iterate over Pages in the category using the API.""" | |||
params = {"action": "query", "list": "categorymembers", | |||
"cmtitle": self._title} | |||
"cmtitle": self.title} | |||
while 1: | |||
params["cmlimit"] = limit if limit else "max" | |||
result = self._site.api_query(**params) | |||
result = self.site.api_query(**params) | |||
for member in result["query"]["categorymembers"]: | |||
title = member["title"] | |||
yield self._site.get_page(title, follow_redirects=follow) | |||
yield self.site.get_page(title, follow_redirects=follow) | |||
if "query-continue" in result: | |||
qcontinue = result["query-continue"]["categorymembers"] | |||
@@ -95,6 +95,45 @@ class Category(Page): | |||
else: | |||
break | |||
def _get_size_via_sql(self, member_type): | |||
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" | |||
title = self.title.replace(" ", "_").split(":", 1)[1] | |||
if member_type == "size": | |||
result = self.site.sql_query(query, (title,)) | |||
else: | |||
query += " AND cl_type = ?" | |||
result = self.site.sql_query(query, (title, member_type[:-1])) | |||
return list(result)[0] | |||
def _get_size_via_sql(self, member_type): | |||
result = self.site.api_query(action="query", prop="categoryinfo", | |||
cmtitle=self.title) | |||
info = result["query"]["pages"].values()[0]["categoryinfo"] | |||
return info[member_type] | |||
def _get_size(self, member_type): | |||
services = { | |||
self.site.SERVICE_API: self._size_via_api, | |||
self.site.SERVICE_SQL: self._size_via_sql | |||
} | |||
return self.site.delegate(services, (member_type,)) | |||
@property | |||
def size(self): | |||
return self._get_size("size") | |||
@property | |||
def pages(self): | |||
return self._get_size("pages") | |||
@property | |||
def files(self): | |||
return self._get_size("files") | |||
@property | |||
def subcats(self): | |||
return self._get_size("subcats") | |||
def get_members(self, use_sql=False, limit=None, follow_redirects=None): | |||
"""Iterate over Pages in the category. | |||
@@ -117,7 +117,7 @@ class Page(CopyrightMixIn): | |||
prefix = self._title.split(":", 1)[0] | |||
if prefix != title: # ignore a page that's titled "Category" or "User" | |||
try: | |||
self._namespace = self._site.namespace_name_to_id(prefix) | |||
self._namespace = self.site.namespace_name_to_id(prefix) | |||
except exceptions.NamespaceNotFoundError: | |||
self._namespace = 0 | |||
else: | |||
@@ -137,7 +137,7 @@ class Page(CopyrightMixIn): | |||
def __str__(self): | |||
"""Return a nice string representation of the Page.""" | |||
return '<Page "{0}" of {1}>'.format(self.title, str(self._site)) | |||
return '<Page "{0}" of {1}>'.format(self.title, str(self.site)) | |||
def _assert_validity(self): | |||
"""Used to ensure that our page's title is valid. | |||
@@ -199,7 +199,7 @@ class Page(CopyrightMixIn): | |||
Assuming the API is sound, this should not raise any exceptions. | |||
""" | |||
if not result: | |||
query = self._site.api_query | |||
query = self.site.api_query | |||
result = query(action="query", rvprop="user", intoken="edit", | |||
prop="info|revisions", rvlimit=1, rvdir="newer", | |||
titles=self._title, inprop="protection|url") | |||
@@ -263,7 +263,7 @@ class Page(CopyrightMixIn): | |||
want to force content reloading. | |||
""" | |||
if not result: | |||
query = self._site.api_query | |||
query = self.site.api_query | |||
result = query(action="query", prop="revisions", rvlimit=1, | |||
rvprop="content|timestamp", titles=self._title) | |||
@@ -310,8 +310,8 @@ class Page(CopyrightMixIn): | |||
# Try the API query, catching most errors with our handler: | |||
try: | |||
result = self._site.api_query(**params) | |||
except exceptions.SiteAPIError as error: | |||
result = self.site.api_query(**params) | |||
except exceptions.APIError as error: | |||
if not hasattr(error, "code"): | |||
raise # We can only handle errors with a code attribute | |||
result = self._handle_edit_errors(error, params, tries) | |||
@@ -375,12 +375,12 @@ class Page(CopyrightMixIn): | |||
elif error.code in ["noedit-anon", "cantcreate-anon", | |||
"noimageredirect-anon"]: | |||
if not all(self._site._login_info): | |||
if not all(self.site._login_info): | |||
# Insufficient login info: | |||
raise exceptions.PermissionsError(error.info) | |||
if tries == 0: | |||
# We have login info; try to login: | |||
self._site._login(self._site._login_info) | |||
self.site._login(self.site._login_info) | |||
self._token = None # Need a new token; old one is invalid now | |||
return self._edit(params=params, tries=1) | |||
else: | |||
@@ -416,13 +416,13 @@ class Page(CopyrightMixIn): | |||
log in. Otherwise, raise PermissionsError with details. | |||
""" | |||
if assertion == "user": | |||
if not all(self._site._login_info): | |||
if not all(self.site._login_info): | |||
# Insufficient login info: | |||
e = "AssertEdit: user assertion failed, and no login info was provided." | |||
raise exceptions.PermissionsError(e) | |||
if tries == 0: | |||
# We have login info; try to login: | |||
self._site._login(self._site._login_info) | |||
self.site._login(self.site._login_info) | |||
self._token = None # Need a new token; old one is invalid now | |||
return self._edit(params=params, tries=1) | |||
else: | |||
@@ -502,8 +502,8 @@ class Page(CopyrightMixIn): | |||
return self._fullurl | |||
else: | |||
slug = quote(self._title.replace(" ", "_"), safe="/:") | |||
path = self._site._article_path.replace("$1", slug) | |||
return ''.join((self._site.url, path)) | |||
path = self.site._article_path.replace("$1", slug) | |||
return ''.join((self.site.url, path)) | |||
@property | |||
def namespace(self): | |||
@@ -580,7 +580,7 @@ class Page(CopyrightMixIn): | |||
otherwise missing or invalid. | |||
""" | |||
if self._namespace < 0: | |||
ns = self._site.namespace_id_to_name(self._namespace) | |||
ns = self.site.namespace_id_to_name(self._namespace) | |||
e = u"Pages in the {0} namespace can't have talk pages.".format(ns) | |||
raise exceptions.InvalidPageError(e) | |||
@@ -594,7 +594,7 @@ class Page(CopyrightMixIn): | |||
except IndexError: | |||
body = self._title | |||
new_prefix = self._site.namespace_id_to_name(new_ns) | |||
new_prefix = self.site.namespace_id_to_name(new_ns) | |||
# If the new page is in namespace 0, don't do ":Title" (it's correct, | |||
# but unnecessary), just do "Title": | |||
@@ -605,7 +605,7 @@ class Page(CopyrightMixIn): | |||
if follow_redirects is None: | |||
follow_redirects = self._follow_redirects | |||
return Page(self._site, new_title, follow_redirects) | |||
return Page(self.site, new_title, follow_redirects) | |||
def get(self): | |||
"""Return page content, which is cached if you try to call get again. | |||
@@ -616,7 +616,7 @@ class Page(CopyrightMixIn): | |||
if self._exists == self.PAGE_UNKNOWN: | |||
# Kill two birds with one stone by doing an API query for both our | |||
# attributes and our page content: | |||
query = self._site.api_query | |||
query = self.site.api_query | |||
result = query(action="query", rvlimit=1, titles=self._title, | |||
prop="info|revisions", inprop="protection|url", | |||
intoken="edit", rvprop="content|timestamp") | |||
@@ -680,7 +680,7 @@ class Page(CopyrightMixIn): | |||
if not self._creator: | |||
self._load() | |||
self._assert_existence() | |||
return self._site.get_user(self._creator) | |||
return self.site.get_user(self._creator) | |||
def parse(self): | |||
"""Parse the page content for templates, links, etc. | |||
@@ -82,6 +82,8 @@ class Site(object): | |||
- :py:meth:`get_category`: returns a Category for the given title | |||
- :py:meth:`get_user`: returns a User object for the given name | |||
""" | |||
SERVICE_API = 1 | |||
SERVICE_SQL = 2 | |||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||
article_path=None, script_path=None, sql=None, | |||
@@ -228,7 +230,7 @@ class Site(object): | |||
e = e.format(error.code) | |||
else: | |||
e = "API query failed." | |||
raise exceptions.SiteAPIError(e) | |||
raise exceptions.APIError(e) | |||
result = response.read() | |||
if response.headers.get("Content-Encoding") == "gzip": | |||
@@ -242,7 +244,7 @@ class Site(object): | |||
"""Given API query params, return the URL to query and POST data.""" | |||
if not self._base_url or self._script_path is None: | |||
e = "Tried to do an API query, but no API URL is known." | |||
raise exceptions.SiteAPIError(e) | |||
raise exceptions.APIError(e) | |||
url = ''.join((self.url, self._script_path, "/api.php")) | |||
params["format"] = "json" # This is the only format we understand | |||
@@ -260,7 +262,7 @@ class Site(object): | |||
res = loads(result) # Try to parse as a JSON object | |||
except ValueError: | |||
e = "API query failed: JSON could not be decoded." | |||
raise exceptions.SiteAPIError(e) | |||
raise exceptions.APIError(e) | |||
try: | |||
code = res["error"]["code"] | |||
@@ -271,7 +273,7 @@ class Site(object): | |||
if code == "maxlag": # We've been throttled by the server | |||
if tries >= self._max_retries: | |||
e = "Maximum number of retries reached ({0})." | |||
raise exceptions.SiteAPIError(e.format(self._max_retries)) | |||
raise exceptions.APIError(e.format(self._max_retries)) | |||
tries += 1 | |||
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' | |||
self._logger.info(msg.format(info, wait, tries, self._max_retries)) | |||
@@ -279,7 +281,7 @@ class Site(object): | |||
return self._api_query(params, tries=tries, wait=wait*2) | |||
else: # Some unknown error occurred | |||
e = 'API query failed: got error "{0}"; server says: "{1}".' | |||
error = exceptions.SiteAPIError(e.format(code, info)) | |||
error = exceptions.APIError(e.format(code, info)) | |||
error.code, error.info = code, info | |||
raise error | |||
@@ -522,6 +524,10 @@ class Site(object): | |||
self._sql_conn = oursql.connect(**args) | |||
def _get_service_order(self): | |||
"""DOCSTRING """ | |||
return [self.SERVICE_SQL, self.SERVICE_API] | |||
@property | |||
def name(self): | |||
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" | |||
@@ -559,7 +565,7 @@ class Site(object): | |||
This will first attempt to construct an API url from | |||
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need | |||
both of these, or else we'll raise | |||
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If | |||
:py:exc:`~earwigbot.exceptions.APIError`. If | |||
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki | |||
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is | |||
``True``, otherwise HTTP. | |||
@@ -578,7 +584,7 @@ class Site(object): | |||
load it as a JSON object, and return it. | |||
If our request failed for some reason, we'll raise | |||
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that | |||
:py:exc:`~earwigbot.exceptions.APIError` with details. If that | |||
reason was due to maxlag, we'll sleep for a bit and then repeat the | |||
query until we exceed :py:attr:`self._max_retries`. | |||
@@ -739,3 +745,16 @@ class Site(object): | |||
else: | |||
username = self._get_username() | |||
return User(self, username) | |||
def delegate(self, services, args=None, kwargs=None): | |||
""" DOCSTRING""" | |||
if not args: | |||
args = () | |||
if not kwargs: | |||
kwargs = {} | |||
order = self._get_service_order() | |||
for srv in order: | |||
if srv in services: | |||
return services[srv](*args, **kwargs) | |||
raise exceptions.NoServiceError(services) |
@@ -82,7 +82,7 @@ class User(object): | |||
def __str__(self): | |||
"""Return a nice string representation of the User.""" | |||
return '<User "{0}" of {1}>'.format(self._name, str(self._site)) | |||
return '<User "{0}" of {1}>'.format(self.name, str(self.site)) | |||
def _get_attribute(self, attr): | |||
"""Internally used to get an attribute by name. | |||
@@ -107,8 +107,8 @@ class User(object): | |||
is not defined. This defines it. | |||
""" | |||
props = "blockinfo|groups|rights|editcount|registration|emailable|gender" | |||
result = self._site.api_query(action="query", list="users", | |||
ususers=self._name, usprop=props) | |||
result = self.site.api_query(action="query", list="users", | |||
ususers=self._name, usprop=props) | |||
res = result["query"]["users"][0] | |||
# normalize our username in case it was entered oddly | |||
@@ -275,9 +275,9 @@ class User(object): | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
""" | |||
prefix = self._site.namespace_id_to_name(constants.NS_USER) | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER) | |||
pagename = ':'.join((prefix, self._name)) | |||
return Page(self._site, pagename) | |||
return Page(self.site, pagename) | |||
def get_talkpage(self): | |||
"""Return a Page object representing the user's talkpage. | |||
@@ -285,6 +285,6 @@ class User(object): | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
""" | |||
prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK) | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) | |||
pagename = ':'.join((prefix, self._name)) | |||
return Page(self._site, pagename) | |||
return Page(self.site, pagename) |