Просмотр исходного кода

Merge branch 'feature/service_delegation' into develop

tags/v0.1^2
Ben Kurtovic 12 лет назад
Родитель
Сommit
5217875a1e
9 измененных файлов: 268 добавлений и 94 удалений
  1. +2
    -5
      earwigbot/commands/afc_status.py
  2. +1
    -2
      earwigbot/commands/afc_submissions.py
  3. +16
    -9
      earwigbot/exceptions.py
  4. +1
    -1
      earwigbot/tasks/afc_history.py
  5. +2
    -4
      earwigbot/tasks/afc_statistics.py
  6. +111
    -35
      earwigbot/wiki/category.py
  7. +18
    -18
      earwigbot/wiki/page.py
  8. +110
    -13
      earwigbot/wiki/site.py
  9. +7
    -7
      earwigbot/wiki/user.py

+ 2
- 5
earwigbot/commands/afc_status.py Просмотреть файл

@@ -109,12 +109,9 @@ class AFCStatus(Command):

def count_submissions(self):
"""Returns the number of open AFC submissions (count of CAT:PEND)."""
cat = self.site.get_category("Pending AfC submissions")
subs = len(cat.get_members(use_sql=True))

# Remove [[Wikipedia:Articles for creation/Redirects]] and
# Subtract two for [[Wikipedia:Articles for creation/Redirects]] and
# [[Wikipedia:Files for upload]], which aren't real submissions:
return subs - 2
return self.site.get_category("Pending AfC submissions").pages - 2

def count_redirects(self):
"""Returns the number of open redirect submissions. Calculated as the


+ 1
- 2
earwigbot/commands/afc_submissions.py Просмотреть файл

@@ -55,8 +55,7 @@ class AFCSubmissions(Command):

site = self.bot.wiki.get_site()
category = site.get_category("Pending AfC submissions")
limit = number + len(self.ignore_list)
members = category.get_members(use_sql=True, limit=limit)
members = category.get_members(limit=number + len(self.ignore_list))
urls = [member.url for member in members if member.title not in self.ignore_list]
pages = ", ".join(urls[:number])
self.reply(data, "{0} pending AfC subs: {1}".format(number, pages))

+ 16
- 9
earwigbot/exceptions.py Просмотреть файл

@@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot::
| +-- BrokenSocketError
+-- WikiToolsetError
+-- SiteNotFoundError
+-- SiteAPIError
+-- NoServiceError
+-- APIError
+-- SQLError
+-- LoginError
+-- NamespaceNotFoundError
+-- PageNotFoundError
@@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot::
| +-- ContentTooBigError
| +-- SpamDetectedError
| +-- FilteredError
+-- SQLError
+-- CopyvioCheckError
+-- UnknownSearchEngineError
+-- UnsupportedSearchEngineError
@@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError):
Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`.
"""

class SiteAPIError(WikiToolsetError):
class NoServiceError(WikiToolsetError):
"""No service is functioning to handle a specific task.

Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`.
"""

class APIError(WikiToolsetError):
"""Couldn't connect to a site's API.

Perhaps the server doesn't exist, our URL is wrong or incomplete, or
@@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError):
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
"""

class SQLError(WikiToolsetError):
"""Some error involving SQL querying occurred.

Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
"""

class LoginError(WikiToolsetError):
"""An error occured while trying to login.

@@ -188,12 +201,6 @@ class FilteredError(EditError):
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class SQLError(WikiToolsetError):
"""Some error involving SQL querying occurred.

Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
"""

class CopyvioCheckError(WikiToolsetError):
"""An error occured when checking a page for copyright violations.



+ 1
- 1
earwigbot/tasks/afc_history.py Просмотреть файл

@@ -130,7 +130,7 @@ class AFCHistory(Task):
q_delete = "DELETE FROM page WHERE page_id = ?"
q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?"
q_insert = "INSERT INTO page VALUES (?, ?, ?)"
members = category.get_members(use_sql=True)
members = category.get_members()

with self.conn.cursor() as cursor:
for title, pageid in members:


+ 2
- 4
earwigbot/tasks/afc_statistics.py Просмотреть файл

@@ -271,9 +271,7 @@ class AFCStatistics(Task):
tracked = [i[0] for i in cursor.fetchall()]

category = self.site.get_category(self.pending_cat)
pending = category.get_members(use_sql=True)

for title, pageid in pending:
for title, pageid in category.get_members():
if title in self.ignore_list:
continue
if pageid not in tracked:
@@ -663,7 +661,7 @@ class AFCStatistics(Task):
return None, None, None
try:
content = self.get_revision_content(revid)
except exceptions.SiteAPIError:
except exceptions.APIError:
msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})"
self.logger.exception(msg.format(pageid, chart))
return None, None, None


+ 111
- 35
earwigbot/wiki/category.py Просмотреть файл

@@ -37,6 +37,13 @@ class Category(Page):
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category`
is shorthand, accepting category names without the namespace prefix.

*Attributes:*

- :py:attr:`size`: the total number of members in the category
- :py:attr:`pages`: the number of pages in the category
- :py:attr:`files`: the number of files in the category
- :py:attr:`subcats`: the number of subcategories in the category

*Public methods:*

- :py:meth:`get_members`: iterates over Pages in the category
@@ -49,7 +56,27 @@ class Category(Page):

def __str__(self):
"""Return a nice string representation of the Category."""
return '<Category "{0}" of {1}>'.format(self.title, str(self._site))
return '<Category "{0}" of {1}>'.format(self.title, str(self.site))

def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API."""
params = {"action": "query", "list": "categorymembers",
"cmtitle": self.title}

while 1:
params["cmlimit"] = limit if limit else "max"
result = self.site.api_query(**params)
for member in result["query"]["categorymembers"]:
title = member["title"]
yield self.site.get_page(title, follow_redirects=follow)

if "query-continue" in result:
qcontinue = result["query-continue"]["categorymembers"]
params["cmcontinue"] = qcontinue["cmcontinue"]
if limit:
limit -= len(result["query"]["categorymembers"])
else:
break

def _get_members_via_sql(self, limit, follow):
"""Iterate over Pages in the category using SQL."""
@@ -60,55 +87,103 @@ class Category(Page):

if limit:
query += " LIMIT ?"
result = self._site.sql_query(query, (title, limit))
result = self.site.sql_query(query, (title, limit))
else:
result = self._site.sql_query(query, (title,))
result = self.site.sql_query(query, (title,))

members = list(result)
for row in members:
base = row[0].replace("_", " ").decode("utf8")
namespace = self._site.namespace_id_to_name(row[1])
namespace = self.site.namespace_id_to_name(row[1])
if namespace:
title = u":".join((namespace, base))
else: # Avoid doing a silly (albeit valid) ":Pagename" thing
title = base
yield self._site.get_page(title, follow_redirects=follow,
yield self.site.get_page(title, follow_redirects=follow,
pageid=row[2])

def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API."""
params = {"action": "query", "list": "categorymembers",
"cmtitle": self._title}
def _get_size_via_api(self, member_type):
"""Return the size of the category using the API."""
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
title = self.title.replace(" ", "_").split(":", 1)[1]
if member_type == "size":
result = self.site.sql_query(query, (title,))
else:
query += " AND cl_type = ?"
result = self.site.sql_query(query, (title, member_type[:-1]))
return list(result)[0]

def _get_size_via_sql(self, member_type):
"""Return the size of the category using SQL."""
result = self.site.api_query(action="query", prop="categoryinfo",
cmtitle=self.title)
info = result["query"]["pages"].values()[0]["categoryinfo"]
return info[member_type]

def _get_size(self, member_type):
"""Return the size of the category."""
services = {
self.site.SERVICE_API: self._size_via_api,
self.site.SERVICE_SQL: self._size_via_sql
}
return self.site.delegate(services, (member_type,))

@property
def size(self):
"""The total number of members in the category.

Includes pages, files, and subcats. Equal to :py:attr:`pages` +
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or
SQL depending on which are enabled and the amount of lag on each. This
is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("size")

while 1:
params["cmlimit"] = limit if limit else "max"
result = self._site.api_query(**params)
for member in result["query"]["categorymembers"]:
title = member["title"]
yield self._site.get_page(title, follow_redirects=follow)
@property
def pages(self):
"""The number of pages in the category.

if "query-continue" in result:
qcontinue = result["query-continue"]["categorymembers"]
params["cmcontinue"] = qcontinue["cmcontinue"]
if limit:
limit -= len(result["query"]["categorymembers"])
else:
break
This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("pages")

@property
def files(self):
"""The number of files in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("files")

def get_members(self, use_sql=False, limit=None, follow_redirects=None):
@property
def subcats(self):
"""The number of subcategories in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("subcats")

def get_members(self, limit=None, follow_redirects=None):
"""Iterate over Pages in the category.

If *use_sql* is ``True``, we will use a SQL query instead of the API.
Note that pages are retrieved from the API in chunks (by default, in
500-page chunks for normal users and 5000-page chunks for bots and
admins), so queries may be made as we go along. If *limit* is given, we
will provide this many pages, or less if the category is smaller. By
default, *limit* is ``None``, meaning we will keep iterating over
members until the category is exhausted. *follow_redirects* is passed
directly to :py:meth:`site.get_page()
If *limit* is given, we will provide this many pages, or less if the
category is smaller. By default, *limit* is ``None``, meaning we will
keep iterating over members until the category is exhausted.
*follow_redirects* is passed directly to :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which
will use the value passed to our :py:meth:`__init__`.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.

.. note::
Be careful when iterating over very large categories with no limit.
If using the API, at best, you will make one query per 5000 pages,
@@ -121,9 +196,10 @@ class Category(Page):
thousand, in which case the sheer number of titles in memory becomes
problematic.
"""
services = {
self.site.SERVICE_API: self._get_members_via_api,
self.site.SERVICE_SQL: self._get_members_via_sql
}
if follow_redirects is None:
follow_redirects = self._follow_redirects
if use_sql:
return self._get_members_via_sql(limit, follow_redirects)
else:
return self._get_members_via_api(limit, follow_redirects)
return self.site.delegate(services, (follow_redirects,))

+ 18
- 18
earwigbot/wiki/page.py Просмотреть файл

@@ -117,7 +117,7 @@ class Page(CopyrightMixIn):
prefix = self._title.split(":", 1)[0]
if prefix != title: # ignore a page that's titled "Category" or "User"
try:
self._namespace = self._site.namespace_name_to_id(prefix)
self._namespace = self.site.namespace_name_to_id(prefix)
except exceptions.NamespaceNotFoundError:
self._namespace = 0
else:
@@ -137,7 +137,7 @@ class Page(CopyrightMixIn):

def __str__(self):
"""Return a nice string representation of the Page."""
return '<Page "{0}" of {1}>'.format(self.title, str(self._site))
return '<Page "{0}" of {1}>'.format(self.title, str(self.site))

def _assert_validity(self):
"""Used to ensure that our page's title is valid.
@@ -199,7 +199,7 @@ class Page(CopyrightMixIn):
Assuming the API is sound, this should not raise any exceptions.
"""
if not result:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", rvprop="user", intoken="edit",
prop="info|revisions", rvlimit=1, rvdir="newer",
titles=self._title, inprop="protection|url")
@@ -263,7 +263,7 @@ class Page(CopyrightMixIn):
want to force content reloading.
"""
if not result:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", prop="revisions", rvlimit=1,
rvprop="content|timestamp", titles=self._title)

@@ -310,8 +310,8 @@ class Page(CopyrightMixIn):

# Try the API query, catching most errors with our handler:
try:
result = self._site.api_query(**params)
except exceptions.SiteAPIError as error:
result = self.site.api_query(**params)
except exceptions.APIError as error:
if not hasattr(error, "code"):
raise # We can only handle errors with a code attribute
result = self._handle_edit_errors(error, params, tries)
@@ -375,12 +375,12 @@ class Page(CopyrightMixIn):

elif error.code in ["noedit-anon", "cantcreate-anon",
"noimageredirect-anon"]:
if not all(self._site._login_info):
if not all(self.site._login_info):
# Insufficient login info:
raise exceptions.PermissionsError(error.info)
if tries == 0:
# We have login info; try to login:
self._site._login(self._site._login_info)
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1)
else:
@@ -416,13 +416,13 @@ class Page(CopyrightMixIn):
log in. Otherwise, raise PermissionsError with details.
"""
if assertion == "user":
if not all(self._site._login_info):
if not all(self.site._login_info):
# Insufficient login info:
e = "AssertEdit: user assertion failed, and no login info was provided."
raise exceptions.PermissionsError(e)
if tries == 0:
# We have login info; try to login:
self._site._login(self._site._login_info)
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1)
else:
@@ -476,7 +476,7 @@ class Page(CopyrightMixIn):
Makes an API query only if we haven't already made one and the *pageid*
parameter to :py:meth:`__init__` was left as ``None``, which should be
true for all cases except when pages are returned by an SQL generator
(like :py:meth:`category.get_members(use_sql=True)
(like :py:meth:`category.get_members()
<earwigbot.wiki.category.Category.get_members>`).

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
@@ -502,8 +502,8 @@ class Page(CopyrightMixIn):
return self._fullurl
else:
slug = quote(self._title.replace(" ", "_"), safe="/:")
path = self._site._article_path.replace("$1", slug)
return ''.join((self._site.url, path))
path = self.site._article_path.replace("$1", slug)
return ''.join((self.site.url, path))

@property
def namespace(self):
@@ -580,7 +580,7 @@ class Page(CopyrightMixIn):
otherwise missing or invalid.
"""
if self._namespace < 0:
ns = self._site.namespace_id_to_name(self._namespace)
ns = self.site.namespace_id_to_name(self._namespace)
e = u"Pages in the {0} namespace can't have talk pages.".format(ns)
raise exceptions.InvalidPageError(e)

@@ -594,7 +594,7 @@ class Page(CopyrightMixIn):
except IndexError:
body = self._title

new_prefix = self._site.namespace_id_to_name(new_ns)
new_prefix = self.site.namespace_id_to_name(new_ns)

# If the new page is in namespace 0, don't do ":Title" (it's correct,
# but unnecessary), just do "Title":
@@ -605,7 +605,7 @@ class Page(CopyrightMixIn):

if follow_redirects is None:
follow_redirects = self._follow_redirects
return Page(self._site, new_title, follow_redirects)
return Page(self.site, new_title, follow_redirects)

def get(self):
"""Return page content, which is cached if you try to call get again.
@@ -616,7 +616,7 @@ class Page(CopyrightMixIn):
if self._exists == self.PAGE_UNKNOWN:
# Kill two birds with one stone by doing an API query for both our
# attributes and our page content:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", rvlimit=1, titles=self._title,
prop="info|revisions", inprop="protection|url",
intoken="edit", rvprop="content|timestamp")
@@ -680,7 +680,7 @@ class Page(CopyrightMixIn):
if not self._creator:
self._load()
self._assert_existence()
return self._site.get_user(self._creator)
return self.site.get_user(self._creator)

def parse(self):
"""Parse the page content for templates, links, etc.


+ 110
- 13
earwigbot/wiki/site.py Просмотреть файл

@@ -75,13 +75,17 @@ class Site(object):

- :py:meth:`api_query`: does an API query with kwargs as params
- :py:meth:`sql_query`: does an SQL query and yields its results
- :py:meth:`get_replag`: estimates the database replication lag
- :py:meth:`get_maxlag`: returns the internal database lag
- :py:meth:`get_replag`: estimates the external database lag
- :py:meth:`namespace_id_to_name`: returns names associated with an NS id
- :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name
- :py:meth:`get_page`: returns a Page for the given title
- :py:meth:`get_category`: returns a Category for the given title
- :py:meth:`get_user`: returns a User object for the given name
- :py:meth:`delegate`: controls when the API or SQL is used
"""
SERVICE_API = 1
SERVICE_SQL = 2

def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=None,
@@ -124,11 +128,13 @@ class Site(object):
self._max_retries = 6
self._last_query_time = 0
self._api_lock = Lock()
self._api_info_cache = {"maxlag": 0, "lastcheck": 0}

# Attributes used for SQL queries:
self._sql_data = sql
self._sql_conn = None
self._sql_lock = Lock()
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None}

# Attribute used in copyright violation checks (see CopyrightMixIn):
self._search_config = search_config
@@ -201,7 +207,7 @@ class Site(object):
args.append(key + "=" + val)
return "&".join(args)

def _api_query(self, params, tries=0, wait=5):
def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False):
"""Do an API query with *params* as a dict of parameters.

See the documentation for :py:meth:`api_query` for full implementation
@@ -215,7 +221,7 @@ class Site(object):
sleep(wait_time)
self._last_query_time = time()

url, data = self._build_api_query(params)
url, data = self._build_api_query(params, ignore_maxlag)
self._logger.debug("{0} -> {1}".format(url, data))

try:
@@ -228,7 +234,7 @@ class Site(object):
e = e.format(error.code)
else:
e = "API query failed."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)

result = response.read()
if response.headers.get("Content-Encoding") == "gzip":
@@ -238,17 +244,18 @@ class Site(object):

return self._handle_api_query_result(result, params, tries, wait)

def _build_api_query(self, params):
def _build_api_query(self, params, ignore_maxlag):
"""Given API query params, return the URL to query and POST data."""
if not self._base_url or self._script_path is None:
e = "Tried to do an API query, but no API URL is known."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)

url = ''.join((self.url, self._script_path, "/api.php"))
params["format"] = "json" # This is the only format we understand
if self._assert_edit: # If requested, ensure that we're logged in
params["assert"] = self._assert_edit
if self._maxlag: # If requested, don't overload the servers
if self._maxlag and not ignore_maxlag:
# If requested, don't overload the servers:
params["maxlag"] = self._maxlag

data = self._urlencode_utf8(params)
@@ -260,7 +267,7 @@ class Site(object):
res = loads(result) # Try to parse as a JSON object
except ValueError:
e = "API query failed: JSON could not be decoded."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)

try:
code = res["error"]["code"]
@@ -271,7 +278,7 @@ class Site(object):
if code == "maxlag": # We've been throttled by the server
if tries >= self._max_retries:
e = "Maximum number of retries reached ({0})."
raise exceptions.SiteAPIError(e.format(self._max_retries))
raise exceptions.APIError(e.format(self._max_retries))
tries += 1
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
self._logger.info(msg.format(info, wait, tries, self._max_retries))
@@ -279,7 +286,7 @@ class Site(object):
return self._api_query(params, tries=tries, wait=wait*2)
else: # Some unknown error occurred
e = 'API query failed: got error "{0}"; server says: "{1}".'
error = exceptions.SiteAPIError(e.format(code, info))
error = exceptions.APIError(e.format(code, info))
error.code, error.info = code, info
raise error

@@ -522,6 +529,48 @@ class Site(object):

self._sql_conn = oursql.connect(**args)

def _get_service_order(self):
"""Return a preferred order for using services (e.g. the API and SQL).

A list is returned, starting with the most preferred service first and
ending with the least preferred one. Currently, there are only two
services. SERVICE_API will always be included since the API is expected
to be always usable. In normal circumstances, self.SERVICE_SQL will be
first (with the API second), since using SQL directly is easier on the
servers than making web queries with the API. self.SERVICE_SQL will be
second if replag is greater than three minutes (a cached value updated
every two minutes at most), *unless* API lag is also very high.
self.SERVICE_SQL will not be included in the list if we cannot form a
proper SQL connection.
"""
now = time()
if now - self._sql_info_cache["lastcheck"] > 120:
self._sql_info_cache["lastcheck"] = now
try:
self._sql_info_cache["replag"] = sqllag = self.get_replag()
except (exceptions.SQLError, oursql.Error):
self._sql_info_cache["usable"] = False
return [self.SERVICE_API]
self._sql_info_cache["usable"] = True
else:
if not self._sql_info_cache["usable"]:
return [self.SERVICE_API]

if sqllag > 180:
if not self._maxlag:
return [self.SERVICE_API, self.SERVICE_SQL]
if now - self._api_info_cache["lastcheck"] > 120:
self._api_info_cache["lastcheck"] = now
try:
self._api_info_cache["maxlag"] = apilag = self.get_maxlag()
except exceptions.APIError:
self._api_info_cache["maxlag"] = apilag = 0
if sqllag / (180.0 / self._maxlag) < apilag:
return [self.SERVICE_SQL, self.SERVICE_API]
return [self.SERVICE_API, self.SERVICE_SQL]

return [self.SERVICE_SQL, self.SERVICE_API]

@property
def name(self):
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
@@ -559,7 +608,7 @@ class Site(object):
This will first attempt to construct an API url from
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
both of these, or else we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If
:py:exc:`~earwigbot.exceptions.APIError`. If
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
``True``, otherwise HTTP.
@@ -578,7 +627,7 @@ class Site(object):
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
:py:exc:`~earwigbot.exceptions.APIError` with details. If that
reason was due to maxlag, we'll sleep for a bit and then repeat the
query until we exceed :py:attr:`self._max_retries`.

@@ -635,8 +684,30 @@ class Site(object):
for result in cur:
yield result

def get_maxlag(self, showall=False):
"""Return the internal database replication lag in seconds.

In a typical setup, this function returns the replication lag *within*
the WMF's cluster, *not* external replication lag affecting the
Toolserver (see :py:meth:`get_replag` for that). This is useful when
combined with the ``maxlag`` API query param (added by config), in
which queries will be halted and retried if the lag is too high,
usually above five seconds.

With *showall*, will return a list of the lag for all servers in the
cluster, not just the one with the highest lag.
"""
params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"}
if showall:
params["sishowalldb"] = 1
with self._api_lock:
result = self._api_query(params, ignore_maxlag=True)
if showall:
return [server["lag"] for server in result["query"]["dbrepllag"]]
return result["query"]["dbrepllag"][0]["lag"]

def get_replag(self):
"""Return the estimated database replication lag in seconds.
"""Return the estimated external database replication lag in seconds.

Requires SQL access. This function only makes sense on a replicated
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
@@ -739,3 +810,29 @@ class Site(object):
else:
username = self._get_username()
return User(self, username)

def delegate(self, services, args=None, kwargs=None):
"""Delegate a task to either the API or SQL depending on conditions.

*services* should be a dictionary in which the key is the service name
(:py:attr:`self.SERVICE_API <SERVICE_API>` or
:py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the
function to call for this service. All functions will be passed the
same arguments the tuple *args* and the dict **kwargs**, which are both
empty by default. The service order is determined by
:py:meth:`_get_service_order`.

Not every service needs an entry in the dictionary. Will raise
:py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate
service cannot be found.
"""
if not args:
args = ()
if not kwargs:
kwargs = {}

order = self._get_service_order()
for srv in order:
if srv in services:
return services[srv](*args, **kwargs)
raise exceptions.NoServiceError(services)

+ 7
- 7
earwigbot/wiki/user.py Просмотреть файл

@@ -82,7 +82,7 @@ class User(object):

def __str__(self):
"""Return a nice string representation of the User."""
return '<User "{0}" of {1}>'.format(self._name, str(self._site))
return '<User "{0}" of {1}>'.format(self.name, str(self.site))

def _get_attribute(self, attr):
"""Internally used to get an attribute by name.
@@ -107,8 +107,8 @@ class User(object):
is not defined. This defines it.
"""
props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
result = self._site.api_query(action="query", list="users",
ususers=self._name, usprop=props)
result = self.site.api_query(action="query", list="users",
ususers=self._name, usprop=props)
res = result["query"]["users"][0]

# normalize our username in case it was entered oddly
@@ -275,9 +275,9 @@ class User(object):
No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""
prefix = self._site.namespace_id_to_name(constants.NS_USER)
prefix = self.site.namespace_id_to_name(constants.NS_USER)
pagename = ':'.join((prefix, self._name))
return Page(self._site, pagename)
return Page(self.site, pagename)

def get_talkpage(self):
"""Return a Page object representing the user's talkpage.
@@ -285,6 +285,6 @@ class User(object):
No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""
prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK)
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
pagename = ':'.join((prefix, self._name))
return Page(self._site, pagename)
return Page(self.site, pagename)

Загрузка…
Отмена
Сохранить