@@ -109,12 +109,9 @@ class AFCStatus(Command): | |||
def count_submissions(self): | |||
"""Returns the number of open AFC submissions (count of CAT:PEND).""" | |||
cat = self.site.get_category("Pending AfC submissions") | |||
subs = len(cat.get_members(use_sql=True)) | |||
# Remove [[Wikipedia:Articles for creation/Redirects]] and | |||
# Subtract two for [[Wikipedia:Articles for creation/Redirects]] and | |||
# [[Wikipedia:Files for upload]], which aren't real submissions: | |||
return subs - 2 | |||
return self.site.get_category("Pending AfC submissions").pages - 2 | |||
def count_redirects(self): | |||
"""Returns the number of open redirect submissions. Calculated as the | |||
@@ -55,8 +55,7 @@ class AFCSubmissions(Command): | |||
site = self.bot.wiki.get_site() | |||
category = site.get_category("Pending AfC submissions") | |||
limit = number + len(self.ignore_list) | |||
members = category.get_members(use_sql=True, limit=limit) | |||
members = category.get_members(limit=number + len(self.ignore_list)) | |||
urls = [member.url for member in members if member.title not in self.ignore_list] | |||
pages = ", ".join(urls[:number]) | |||
self.reply(data, "{0} pending AfC subs: {1}".format(number, pages)) |
@@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot:: | |||
| +-- BrokenSocketError | |||
+-- WikiToolsetError | |||
+-- SiteNotFoundError | |||
+-- SiteAPIError | |||
+-- NoServiceError | |||
+-- APIError | |||
+-- SQLError | |||
+-- LoginError | |||
+-- NamespaceNotFoundError | |||
+-- PageNotFoundError | |||
@@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot:: | |||
| +-- ContentTooBigError | |||
| +-- SpamDetectedError | |||
| +-- FilteredError | |||
+-- SQLError | |||
+-- CopyvioCheckError | |||
+-- UnknownSearchEngineError | |||
+-- UnsupportedSearchEngineError | |||
@@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError): | |||
Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`. | |||
""" | |||
class SiteAPIError(WikiToolsetError): | |||
class NoServiceError(WikiToolsetError): | |||
"""No service is functioning to handle a specific task. | |||
Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
class APIError(WikiToolsetError): | |||
"""Couldn't connect to a site's API. | |||
Perhaps the server doesn't exist, our URL is wrong or incomplete, or | |||
@@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError): | |||
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. | |||
""" | |||
class SQLError(WikiToolsetError): | |||
"""Some error involving SQL querying occurred. | |||
Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
""" | |||
class LoginError(WikiToolsetError): | |||
"""An error occured while trying to login. | |||
@@ -188,12 +201,6 @@ class FilteredError(EditError): | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class SQLError(WikiToolsetError): | |||
"""Some error involving SQL querying occurred. | |||
Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
""" | |||
class CopyvioCheckError(WikiToolsetError): | |||
"""An error occured when checking a page for copyright violations. | |||
@@ -130,7 +130,7 @@ class AFCHistory(Task): | |||
q_delete = "DELETE FROM page WHERE page_id = ?" | |||
q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?" | |||
q_insert = "INSERT INTO page VALUES (?, ?, ?)" | |||
members = category.get_members(use_sql=True) | |||
members = category.get_members() | |||
with self.conn.cursor() as cursor: | |||
for title, pageid in members: | |||
@@ -271,9 +271,7 @@ class AFCStatistics(Task): | |||
tracked = [i[0] for i in cursor.fetchall()] | |||
category = self.site.get_category(self.pending_cat) | |||
pending = category.get_members(use_sql=True) | |||
for title, pageid in pending: | |||
for title, pageid in category.get_members(): | |||
if title in self.ignore_list: | |||
continue | |||
if pageid not in tracked: | |||
@@ -663,7 +661,7 @@ class AFCStatistics(Task): | |||
return None, None, None | |||
try: | |||
content = self.get_revision_content(revid) | |||
except exceptions.SiteAPIError: | |||
except exceptions.APIError: | |||
msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})" | |||
self.logger.exception(msg.format(pageid, chart)) | |||
return None, None, None | |||
@@ -37,6 +37,13 @@ class Category(Page): | |||
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` | |||
is shorthand, accepting category names without the namespace prefix. | |||
*Attributes:* | |||
- :py:attr:`size`: the total number of members in the category | |||
- :py:attr:`pages`: the number of pages in the category | |||
- :py:attr:`files`: the number of files in the category | |||
- :py:attr:`subcats`: the number of subcategories in the category | |||
*Public methods:* | |||
- :py:meth:`get_members`: iterates over Pages in the category | |||
@@ -49,7 +56,27 @@ class Category(Page): | |||
def __str__(self): | |||
"""Return a nice string representation of the Category.""" | |||
return '<Category "{0}" of {1}>'.format(self.title, str(self._site)) | |||
return '<Category "{0}" of {1}>'.format(self.title, str(self.site)) | |||
def _get_members_via_api(self, limit, follow): | |||
"""Iterate over Pages in the category using the API.""" | |||
params = {"action": "query", "list": "categorymembers", | |||
"cmtitle": self.title} | |||
while 1: | |||
params["cmlimit"] = limit if limit else "max" | |||
result = self.site.api_query(**params) | |||
for member in result["query"]["categorymembers"]: | |||
title = member["title"] | |||
yield self.site.get_page(title, follow_redirects=follow) | |||
if "query-continue" in result: | |||
qcontinue = result["query-continue"]["categorymembers"] | |||
params["cmcontinue"] = qcontinue["cmcontinue"] | |||
if limit: | |||
limit -= len(result["query"]["categorymembers"]) | |||
else: | |||
break | |||
def _get_members_via_sql(self, limit, follow): | |||
"""Iterate over Pages in the category using SQL.""" | |||
@@ -60,55 +87,103 @@ class Category(Page): | |||
if limit: | |||
query += " LIMIT ?" | |||
result = self._site.sql_query(query, (title, limit)) | |||
result = self.site.sql_query(query, (title, limit)) | |||
else: | |||
result = self._site.sql_query(query, (title,)) | |||
result = self.site.sql_query(query, (title,)) | |||
members = list(result) | |||
for row in members: | |||
base = row[0].replace("_", " ").decode("utf8") | |||
namespace = self._site.namespace_id_to_name(row[1]) | |||
namespace = self.site.namespace_id_to_name(row[1]) | |||
if namespace: | |||
title = u":".join((namespace, base)) | |||
else: # Avoid doing a silly (albeit valid) ":Pagename" thing | |||
title = base | |||
yield self._site.get_page(title, follow_redirects=follow, | |||
yield self.site.get_page(title, follow_redirects=follow, | |||
pageid=row[2]) | |||
def _get_members_via_api(self, limit, follow): | |||
"""Iterate over Pages in the category using the API.""" | |||
params = {"action": "query", "list": "categorymembers", | |||
"cmtitle": self._title} | |||
def _get_size_via_api(self, member_type): | |||
"""Return the size of the category using the API.""" | |||
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" | |||
title = self.title.replace(" ", "_").split(":", 1)[1] | |||
if member_type == "size": | |||
result = self.site.sql_query(query, (title,)) | |||
else: | |||
query += " AND cl_type = ?" | |||
result = self.site.sql_query(query, (title, member_type[:-1])) | |||
return list(result)[0] | |||
def _get_size_via_sql(self, member_type): | |||
"""Return the size of the category using SQL.""" | |||
result = self.site.api_query(action="query", prop="categoryinfo", | |||
cmtitle=self.title) | |||
info = result["query"]["pages"].values()[0]["categoryinfo"] | |||
return info[member_type] | |||
def _get_size(self, member_type): | |||
"""Return the size of the category.""" | |||
services = { | |||
self.site.SERVICE_API: self._size_via_api, | |||
self.site.SERVICE_SQL: self._size_via_sql | |||
} | |||
return self.site.delegate(services, (member_type,)) | |||
@property | |||
def size(self): | |||
"""The total number of members in the category. | |||
Includes pages, files, and subcats. Equal to :py:attr:`pages` + | |||
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or | |||
SQL depending on which are enabled and the amount of lag on each. This | |||
is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("size") | |||
while 1: | |||
params["cmlimit"] = limit if limit else "max" | |||
result = self._site.api_query(**params) | |||
for member in result["query"]["categorymembers"]: | |||
title = member["title"] | |||
yield self._site.get_page(title, follow_redirects=follow) | |||
@property | |||
def pages(self): | |||
"""The number of pages in the category. | |||
if "query-continue" in result: | |||
qcontinue = result["query-continue"]["categorymembers"] | |||
params["cmcontinue"] = qcontinue["cmcontinue"] | |||
if limit: | |||
limit -= len(result["query"]["categorymembers"]) | |||
else: | |||
break | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("pages") | |||
@property | |||
def files(self): | |||
"""The number of files in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("files") | |||
def get_members(self, use_sql=False, limit=None, follow_redirects=None): | |||
@property | |||
def subcats(self): | |||
"""The number of subcategories in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("subcats") | |||
def get_members(self, limit=None, follow_redirects=None): | |||
"""Iterate over Pages in the category. | |||
If *use_sql* is ``True``, we will use a SQL query instead of the API. | |||
Note that pages are retrieved from the API in chunks (by default, in | |||
500-page chunks for normal users and 5000-page chunks for bots and | |||
admins), so queries may be made as we go along. If *limit* is given, we | |||
will provide this many pages, or less if the category is smaller. By | |||
default, *limit* is ``None``, meaning we will keep iterating over | |||
members until the category is exhausted. *follow_redirects* is passed | |||
directly to :py:meth:`site.get_page() | |||
If *limit* is given, we will provide this many pages, or less if the | |||
category is smaller. By default, *limit* is ``None``, meaning we will | |||
keep iterating over members until the category is exhausted. | |||
*follow_redirects* is passed directly to :py:meth:`site.get_page() | |||
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which | |||
will use the value passed to our :py:meth:`__init__`. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
.. note:: | |||
Be careful when iterating over very large categories with no limit. | |||
If using the API, at best, you will make one query per 5000 pages, | |||
@@ -121,9 +196,10 @@ class Category(Page): | |||
thousand, in which case the sheer number of titles in memory becomes | |||
problematic. | |||
""" | |||
services = { | |||
self.site.SERVICE_API: self._get_members_via_api, | |||
self.site.SERVICE_SQL: self._get_members_via_sql | |||
} | |||
if follow_redirects is None: | |||
follow_redirects = self._follow_redirects | |||
if use_sql: | |||
return self._get_members_via_sql(limit, follow_redirects) | |||
else: | |||
return self._get_members_via_api(limit, follow_redirects) | |||
return self.site.delegate(services, (follow_redirects,)) |
@@ -117,7 +117,7 @@ class Page(CopyrightMixIn): | |||
prefix = self._title.split(":", 1)[0] | |||
if prefix != title: # ignore a page that's titled "Category" or "User" | |||
try: | |||
self._namespace = self._site.namespace_name_to_id(prefix) | |||
self._namespace = self.site.namespace_name_to_id(prefix) | |||
except exceptions.NamespaceNotFoundError: | |||
self._namespace = 0 | |||
else: | |||
@@ -137,7 +137,7 @@ class Page(CopyrightMixIn): | |||
def __str__(self): | |||
"""Return a nice string representation of the Page.""" | |||
return '<Page "{0}" of {1}>'.format(self.title, str(self._site)) | |||
return '<Page "{0}" of {1}>'.format(self.title, str(self.site)) | |||
def _assert_validity(self): | |||
"""Used to ensure that our page's title is valid. | |||
@@ -199,7 +199,7 @@ class Page(CopyrightMixIn): | |||
Assuming the API is sound, this should not raise any exceptions. | |||
""" | |||
if not result: | |||
query = self._site.api_query | |||
query = self.site.api_query | |||
result = query(action="query", rvprop="user", intoken="edit", | |||
prop="info|revisions", rvlimit=1, rvdir="newer", | |||
titles=self._title, inprop="protection|url") | |||
@@ -263,7 +263,7 @@ class Page(CopyrightMixIn): | |||
want to force content reloading. | |||
""" | |||
if not result: | |||
query = self._site.api_query | |||
query = self.site.api_query | |||
result = query(action="query", prop="revisions", rvlimit=1, | |||
rvprop="content|timestamp", titles=self._title) | |||
@@ -310,8 +310,8 @@ class Page(CopyrightMixIn): | |||
# Try the API query, catching most errors with our handler: | |||
try: | |||
result = self._site.api_query(**params) | |||
except exceptions.SiteAPIError as error: | |||
result = self.site.api_query(**params) | |||
except exceptions.APIError as error: | |||
if not hasattr(error, "code"): | |||
raise # We can only handle errors with a code attribute | |||
result = self._handle_edit_errors(error, params, tries) | |||
@@ -375,12 +375,12 @@ class Page(CopyrightMixIn): | |||
elif error.code in ["noedit-anon", "cantcreate-anon", | |||
"noimageredirect-anon"]: | |||
if not all(self._site._login_info): | |||
if not all(self.site._login_info): | |||
# Insufficient login info: | |||
raise exceptions.PermissionsError(error.info) | |||
if tries == 0: | |||
# We have login info; try to login: | |||
self._site._login(self._site._login_info) | |||
self.site._login(self.site._login_info) | |||
self._token = None # Need a new token; old one is invalid now | |||
return self._edit(params=params, tries=1) | |||
else: | |||
@@ -416,13 +416,13 @@ class Page(CopyrightMixIn): | |||
log in. Otherwise, raise PermissionsError with details. | |||
""" | |||
if assertion == "user": | |||
if not all(self._site._login_info): | |||
if not all(self.site._login_info): | |||
# Insufficient login info: | |||
e = "AssertEdit: user assertion failed, and no login info was provided." | |||
raise exceptions.PermissionsError(e) | |||
if tries == 0: | |||
# We have login info; try to login: | |||
self._site._login(self._site._login_info) | |||
self.site._login(self.site._login_info) | |||
self._token = None # Need a new token; old one is invalid now | |||
return self._edit(params=params, tries=1) | |||
else: | |||
@@ -476,7 +476,7 @@ class Page(CopyrightMixIn): | |||
Makes an API query only if we haven't already made one and the *pageid* | |||
parameter to :py:meth:`__init__` was left as ``None``, which should be | |||
true for all cases except when pages are returned by an SQL generator | |||
(like :py:meth:`category.get_members(use_sql=True) | |||
(like :py:meth:`category.get_members() | |||
<earwigbot.wiki.category.Category.get_members>`). | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
@@ -502,8 +502,8 @@ class Page(CopyrightMixIn): | |||
return self._fullurl | |||
else: | |||
slug = quote(self._title.replace(" ", "_"), safe="/:") | |||
path = self._site._article_path.replace("$1", slug) | |||
return ''.join((self._site.url, path)) | |||
path = self.site._article_path.replace("$1", slug) | |||
return ''.join((self.site.url, path)) | |||
@property | |||
def namespace(self): | |||
@@ -580,7 +580,7 @@ class Page(CopyrightMixIn): | |||
otherwise missing or invalid. | |||
""" | |||
if self._namespace < 0: | |||
ns = self._site.namespace_id_to_name(self._namespace) | |||
ns = self.site.namespace_id_to_name(self._namespace) | |||
e = u"Pages in the {0} namespace can't have talk pages.".format(ns) | |||
raise exceptions.InvalidPageError(e) | |||
@@ -594,7 +594,7 @@ class Page(CopyrightMixIn): | |||
except IndexError: | |||
body = self._title | |||
new_prefix = self._site.namespace_id_to_name(new_ns) | |||
new_prefix = self.site.namespace_id_to_name(new_ns) | |||
# If the new page is in namespace 0, don't do ":Title" (it's correct, | |||
# but unnecessary), just do "Title": | |||
@@ -605,7 +605,7 @@ class Page(CopyrightMixIn): | |||
if follow_redirects is None: | |||
follow_redirects = self._follow_redirects | |||
return Page(self._site, new_title, follow_redirects) | |||
return Page(self.site, new_title, follow_redirects) | |||
def get(self): | |||
"""Return page content, which is cached if you try to call get again. | |||
@@ -616,7 +616,7 @@ class Page(CopyrightMixIn): | |||
if self._exists == self.PAGE_UNKNOWN: | |||
# Kill two birds with one stone by doing an API query for both our | |||
# attributes and our page content: | |||
query = self._site.api_query | |||
query = self.site.api_query | |||
result = query(action="query", rvlimit=1, titles=self._title, | |||
prop="info|revisions", inprop="protection|url", | |||
intoken="edit", rvprop="content|timestamp") | |||
@@ -680,7 +680,7 @@ class Page(CopyrightMixIn): | |||
if not self._creator: | |||
self._load() | |||
self._assert_existence() | |||
return self._site.get_user(self._creator) | |||
return self.site.get_user(self._creator) | |||
def parse(self): | |||
"""Parse the page content for templates, links, etc. | |||
@@ -75,13 +75,17 @@ class Site(object): | |||
- :py:meth:`api_query`: does an API query with kwargs as params | |||
- :py:meth:`sql_query`: does an SQL query and yields its results | |||
- :py:meth:`get_replag`: estimates the database replication lag | |||
- :py:meth:`get_maxlag`: returns the internal database lag | |||
- :py:meth:`get_replag`: estimates the external database lag | |||
- :py:meth:`namespace_id_to_name`: returns names associated with an NS id | |||
- :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name | |||
- :py:meth:`get_page`: returns a Page for the given title | |||
- :py:meth:`get_category`: returns a Category for the given title | |||
- :py:meth:`get_user`: returns a User object for the given name | |||
- :py:meth:`delegate`: controls when the API or SQL is used | |||
""" | |||
SERVICE_API = 1 | |||
SERVICE_SQL = 2 | |||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||
article_path=None, script_path=None, sql=None, | |||
@@ -124,11 +128,13 @@ class Site(object): | |||
self._max_retries = 6 | |||
self._last_query_time = 0 | |||
self._api_lock = Lock() | |||
self._api_info_cache = {"maxlag": 0, "lastcheck": 0} | |||
# Attributes used for SQL queries: | |||
self._sql_data = sql | |||
self._sql_conn = None | |||
self._sql_lock = Lock() | |||
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} | |||
# Attribute used in copyright violation checks (see CopyrightMixIn): | |||
self._search_config = search_config | |||
@@ -201,7 +207,7 @@ class Site(object): | |||
args.append(key + "=" + val) | |||
return "&".join(args) | |||
def _api_query(self, params, tries=0, wait=5): | |||
def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False): | |||
"""Do an API query with *params* as a dict of parameters. | |||
See the documentation for :py:meth:`api_query` for full implementation | |||
@@ -215,7 +221,7 @@ class Site(object): | |||
sleep(wait_time) | |||
self._last_query_time = time() | |||
url, data = self._build_api_query(params) | |||
url, data = self._build_api_query(params, ignore_maxlag) | |||
self._logger.debug("{0} -> {1}".format(url, data)) | |||
try: | |||
@@ -228,7 +234,7 @@ class Site(object): | |||
e = e.format(error.code) | |||
else: | |||
e = "API query failed." | |||
raise exceptions.SiteAPIError(e) | |||
raise exceptions.APIError(e) | |||
result = response.read() | |||
if response.headers.get("Content-Encoding") == "gzip": | |||
@@ -238,17 +244,18 @@ class Site(object): | |||
return self._handle_api_query_result(result, params, tries, wait) | |||
def _build_api_query(self, params): | |||
def _build_api_query(self, params, ignore_maxlag): | |||
"""Given API query params, return the URL to query and POST data.""" | |||
if not self._base_url or self._script_path is None: | |||
e = "Tried to do an API query, but no API URL is known." | |||
raise exceptions.SiteAPIError(e) | |||
raise exceptions.APIError(e) | |||
url = ''.join((self.url, self._script_path, "/api.php")) | |||
params["format"] = "json" # This is the only format we understand | |||
if self._assert_edit: # If requested, ensure that we're logged in | |||
params["assert"] = self._assert_edit | |||
if self._maxlag: # If requested, don't overload the servers | |||
if self._maxlag and not ignore_maxlag: | |||
# If requested, don't overload the servers: | |||
params["maxlag"] = self._maxlag | |||
data = self._urlencode_utf8(params) | |||
@@ -260,7 +267,7 @@ class Site(object): | |||
res = loads(result) # Try to parse as a JSON object | |||
except ValueError: | |||
e = "API query failed: JSON could not be decoded." | |||
raise exceptions.SiteAPIError(e) | |||
raise exceptions.APIError(e) | |||
try: | |||
code = res["error"]["code"] | |||
@@ -271,7 +278,7 @@ class Site(object): | |||
if code == "maxlag": # We've been throttled by the server | |||
if tries >= self._max_retries: | |||
e = "Maximum number of retries reached ({0})." | |||
raise exceptions.SiteAPIError(e.format(self._max_retries)) | |||
raise exceptions.APIError(e.format(self._max_retries)) | |||
tries += 1 | |||
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' | |||
self._logger.info(msg.format(info, wait, tries, self._max_retries)) | |||
@@ -279,7 +286,7 @@ class Site(object): | |||
return self._api_query(params, tries=tries, wait=wait*2) | |||
else: # Some unknown error occurred | |||
e = 'API query failed: got error "{0}"; server says: "{1}".' | |||
error = exceptions.SiteAPIError(e.format(code, info)) | |||
error = exceptions.APIError(e.format(code, info)) | |||
error.code, error.info = code, info | |||
raise error | |||
@@ -522,6 +529,48 @@ class Site(object): | |||
self._sql_conn = oursql.connect(**args) | |||
def _get_service_order(self): | |||
"""Return a preferred order for using services (e.g. the API and SQL). | |||
A list is returned, starting with the most preferred service first and | |||
ending with the least preferred one. Currently, there are only two | |||
services. SERVICE_API will always be included since the API is expected | |||
to be always usable. In normal circumstances, self.SERVICE_SQL will be | |||
first (with the API second), since using SQL directly is easier on the | |||
servers than making web queries with the API. self.SERVICE_SQL will be | |||
second if replag is greater than three minutes (a cached value updated | |||
every two minutes at most), *unless* API lag is also very high. | |||
self.SERVICE_SQL will not be included in the list if we cannot form a | |||
proper SQL connection. | |||
""" | |||
now = time() | |||
if now - self._sql_info_cache["lastcheck"] > 120: | |||
self._sql_info_cache["lastcheck"] = now | |||
try: | |||
self._sql_info_cache["replag"] = sqllag = self.get_replag() | |||
except (exceptions.SQLError, oursql.Error): | |||
self._sql_info_cache["usable"] = False | |||
return [self.SERVICE_API] | |||
self._sql_info_cache["usable"] = True | |||
else: | |||
if not self._sql_info_cache["usable"]: | |||
return [self.SERVICE_API] | |||
if sqllag > 180: | |||
if not self._maxlag: | |||
return [self.SERVICE_API, self.SERVICE_SQL] | |||
if now - self._api_info_cache["lastcheck"] > 120: | |||
self._api_info_cache["lastcheck"] = now | |||
try: | |||
self._api_info_cache["maxlag"] = apilag = self.get_maxlag() | |||
except exceptions.APIError: | |||
self._api_info_cache["maxlag"] = apilag = 0 | |||
if sqllag / (180.0 / self._maxlag) < apilag: | |||
return [self.SERVICE_SQL, self.SERVICE_API] | |||
return [self.SERVICE_API, self.SERVICE_SQL] | |||
return [self.SERVICE_SQL, self.SERVICE_API] | |||
@property | |||
def name(self): | |||
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" | |||
@@ -559,7 +608,7 @@ class Site(object): | |||
This will first attempt to construct an API url from | |||
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need | |||
both of these, or else we'll raise | |||
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If | |||
:py:exc:`~earwigbot.exceptions.APIError`. If | |||
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki | |||
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is | |||
``True``, otherwise HTTP. | |||
@@ -578,7 +627,7 @@ class Site(object): | |||
load it as a JSON object, and return it. | |||
If our request failed for some reason, we'll raise | |||
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that | |||
:py:exc:`~earwigbot.exceptions.APIError` with details. If that | |||
reason was due to maxlag, we'll sleep for a bit and then repeat the | |||
query until we exceed :py:attr:`self._max_retries`. | |||
@@ -635,8 +684,30 @@ class Site(object): | |||
for result in cur: | |||
yield result | |||
def get_maxlag(self, showall=False): | |||
"""Return the internal database replication lag in seconds. | |||
In a typical setup, this function returns the replication lag *within* | |||
the WMF's cluster, *not* external replication lag affecting the | |||
Toolserver (see :py:meth:`get_replag` for that). This is useful when | |||
combined with the ``maxlag`` API query param (added by config), in | |||
which queries will be halted and retried if the lag is too high, | |||
usually above five seconds. | |||
With *showall*, will return a list of the lag for all servers in the | |||
cluster, not just the one with the highest lag. | |||
""" | |||
params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"} | |||
if showall: | |||
params["sishowalldb"] = 1 | |||
with self._api_lock: | |||
result = self._api_query(params, ignore_maxlag=True) | |||
if showall: | |||
return [server["lag"] for server in result["query"]["dbrepllag"]] | |||
return result["query"]["dbrepllag"][0]["lag"] | |||
def get_replag(self): | |||
"""Return the estimated database replication lag in seconds. | |||
"""Return the estimated external database replication lag in seconds. | |||
Requires SQL access. This function only makes sense on a replicated | |||
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a | |||
@@ -739,3 +810,29 @@ class Site(object): | |||
else: | |||
username = self._get_username() | |||
return User(self, username) | |||
def delegate(self, services, args=None, kwargs=None): | |||
"""Delegate a task to either the API or SQL depending on conditions. | |||
*services* should be a dictionary in which the key is the service name | |||
(:py:attr:`self.SERVICE_API <SERVICE_API>` or | |||
:py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the | |||
function to call for this service. All functions will be passed the | |||
same arguments the tuple *args* and the dict **kwargs**, which are both | |||
empty by default. The service order is determined by | |||
:py:meth:`_get_service_order`. | |||
Not every service needs an entry in the dictionary. Will raise | |||
:py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate | |||
service cannot be found. | |||
""" | |||
if not args: | |||
args = () | |||
if not kwargs: | |||
kwargs = {} | |||
order = self._get_service_order() | |||
for srv in order: | |||
if srv in services: | |||
return services[srv](*args, **kwargs) | |||
raise exceptions.NoServiceError(services) |
@@ -82,7 +82,7 @@ class User(object): | |||
def __str__(self): | |||
"""Return a nice string representation of the User.""" | |||
return '<User "{0}" of {1}>'.format(self._name, str(self._site)) | |||
return '<User "{0}" of {1}>'.format(self.name, str(self.site)) | |||
def _get_attribute(self, attr): | |||
"""Internally used to get an attribute by name. | |||
@@ -107,8 +107,8 @@ class User(object): | |||
is not defined. This defines it. | |||
""" | |||
props = "blockinfo|groups|rights|editcount|registration|emailable|gender" | |||
result = self._site.api_query(action="query", list="users", | |||
ususers=self._name, usprop=props) | |||
result = self.site.api_query(action="query", list="users", | |||
ususers=self._name, usprop=props) | |||
res = result["query"]["users"][0] | |||
# normalize our username in case it was entered oddly | |||
@@ -275,9 +275,9 @@ class User(object): | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
""" | |||
prefix = self._site.namespace_id_to_name(constants.NS_USER) | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER) | |||
pagename = ':'.join((prefix, self._name)) | |||
return Page(self._site, pagename) | |||
return Page(self.site, pagename) | |||
def get_talkpage(self): | |||
"""Return a Page object representing the user's talkpage. | |||
@@ -285,6 +285,6 @@ class User(object): | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
""" | |||
prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK) | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) | |||
pagename = ':'.join((prefix, self._name)) | |||
return Page(self._site, pagename) | |||
return Page(self.site, pagename) |