ソースを参照

First stages of service delegation.

tags/v0.1^2
Ben Kurtovic 12年前
コミット
6433957ae9
7個のファイルの変更116行の追加54行の削除
  1. +2
    -5
      earwigbot/commands/afc_status.py
  2. +16
    -9
      earwigbot/exceptions.py
  3. +1
    -1
      earwigbot/tasks/afc_statistics.py
  4. +47
    -8
      earwigbot/wiki/category.py
  5. +17
    -17
      earwigbot/wiki/page.py
  6. +26
    -7
      earwigbot/wiki/site.py
  7. +7
    -7
      earwigbot/wiki/user.py

+ 2
- 5
earwigbot/commands/afc_status.py ファイルの表示

@@ -109,12 +109,9 @@ class AFCStatus(Command):

def count_submissions(self):
"""Returns the number of open AFC submissions (count of CAT:PEND)."""
cat = self.site.get_category("Pending AfC submissions")
subs = len(cat.get_members(use_sql=True))

# Remove [[Wikipedia:Articles for creation/Redirects]] and
# Subtract two for [[Wikipedia:Articles for creation/Redirects]] and
# [[Wikipedia:Files for upload]], which aren't real submissions:
return subs - 2
return self.site.get_category("Pending AfC submissions").pages - 2

def count_redirects(self):
"""Returns the number of open redirect submissions. Calculated as the


+ 16
- 9
earwigbot/exceptions.py ファイルの表示

@@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot::
| +-- BrokenSocketError
+-- WikiToolsetError
+-- SiteNotFoundError
+-- SiteAPIError
+-- NoServiceError
+-- APIError
+-- SQLError
+-- LoginError
+-- NamespaceNotFoundError
+-- PageNotFoundError
@@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot::
| +-- ContentTooBigError
| +-- SpamDetectedError
| +-- FilteredError
+-- SQLError
+-- CopyvioCheckError
+-- UnknownSearchEngineError
+-- UnsupportedSearchEngineError
@@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError):
Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`.
"""

class SiteAPIError(WikiToolsetError):
class NoServiceError(WikiToolsetError):
"""No service is functioning to handle a specific task.

Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`.
"""

class APIError(WikiToolsetError):
"""Couldn't connect to a site's API.

Perhaps the server doesn't exist, our URL is wrong or incomplete, or
@@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError):
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
"""

class SQLError(WikiToolsetError):
"""Some error involving SQL querying occurred.

Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
"""

class LoginError(WikiToolsetError):
"""An error occured while trying to login.

@@ -188,12 +201,6 @@ class FilteredError(EditError):
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class SQLError(WikiToolsetError):
"""Some error involving SQL querying occurred.

Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
"""

class CopyvioCheckError(WikiToolsetError):
"""An error occured when checking a page for copyright violations.



+ 1
- 1
earwigbot/tasks/afc_statistics.py ファイルの表示

@@ -663,7 +663,7 @@ class AFCStatistics(Task):
return None, None, None
try:
content = self.get_revision_content(revid)
except exceptions.SiteAPIError:
except exceptions.APIError:
msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})"
self.logger.exception(msg.format(pageid, chart))
return None, None, None


+ 47
- 8
earwigbot/wiki/category.py ファイルの表示

@@ -49,7 +49,7 @@ class Category(Page):

def __str__(self):
"""Return a nice string representation of the Category."""
return '<Category "{0}" of {1}>'.format(self.title, str(self._site))
return '<Category "{0}" of {1}>'.format(self.title, str(self.site))

def _get_members_via_sql(self, limit, follow):
"""Iterate over Pages in the category using SQL."""
@@ -60,32 +60,32 @@ class Category(Page):

if limit:
query += " LIMIT ?"
result = self._site.sql_query(query, (title, limit))
result = self.site.sql_query(query, (title, limit))
else:
result = self._site.sql_query(query, (title,))
result = self.site.sql_query(query, (title,))

members = list(result)
for row in members:
base = row[0].replace("_", " ").decode("utf8")
namespace = self._site.namespace_id_to_name(row[1])
namespace = self.site.namespace_id_to_name(row[1])
if namespace:
title = u":".join((namespace, base))
else: # Avoid doing a silly (albeit valid) ":Pagename" thing
title = base
yield self._site.get_page(title, follow_redirects=follow,
yield self.site.get_page(title, follow_redirects=follow,
pageid=row[2])

def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API."""
params = {"action": "query", "list": "categorymembers",
"cmtitle": self._title}
"cmtitle": self.title}

while 1:
params["cmlimit"] = limit if limit else "max"
result = self._site.api_query(**params)
result = self.site.api_query(**params)
for member in result["query"]["categorymembers"]:
title = member["title"]
yield self._site.get_page(title, follow_redirects=follow)
yield self.site.get_page(title, follow_redirects=follow)

if "query-continue" in result:
qcontinue = result["query-continue"]["categorymembers"]
@@ -95,6 +95,45 @@ class Category(Page):
else:
break

def _get_size_via_sql(self, member_type):
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
title = self.title.replace(" ", "_").split(":", 1)[1]
if member_type == "size":
result = self.site.sql_query(query, (title,))
else:
query += " AND cl_type = ?"
result = self.site.sql_query(query, (title, member_type[:-1]))
return list(result)[0]

def _get_size_via_sql(self, member_type):
result = self.site.api_query(action="query", prop="categoryinfo",
cmtitle=self.title)
info = result["query"]["pages"].values()[0]["categoryinfo"]
return info[member_type]

def _get_size(self, member_type):
services = {
self.site.SERVICE_API: self._size_via_api,
self.site.SERVICE_SQL: self._size_via_sql
}
return self.site.delegate(services, (member_type,))

@property
def size(self):
return self._get_size("size")

@property
def pages(self):
return self._get_size("pages")

@property
def files(self):
return self._get_size("files")

@property
def subcats(self):
return self._get_size("subcats")

def get_members(self, use_sql=False, limit=None, follow_redirects=None):
"""Iterate over Pages in the category.



+ 17
- 17
earwigbot/wiki/page.py ファイルの表示

@@ -117,7 +117,7 @@ class Page(CopyrightMixIn):
prefix = self._title.split(":", 1)[0]
if prefix != title: # ignore a page that's titled "Category" or "User"
try:
self._namespace = self._site.namespace_name_to_id(prefix)
self._namespace = self.site.namespace_name_to_id(prefix)
except exceptions.NamespaceNotFoundError:
self._namespace = 0
else:
@@ -137,7 +137,7 @@ class Page(CopyrightMixIn):

def __str__(self):
"""Return a nice string representation of the Page."""
return '<Page "{0}" of {1}>'.format(self.title, str(self._site))
return '<Page "{0}" of {1}>'.format(self.title, str(self.site))

def _assert_validity(self):
"""Used to ensure that our page's title is valid.
@@ -199,7 +199,7 @@ class Page(CopyrightMixIn):
Assuming the API is sound, this should not raise any exceptions.
"""
if not result:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", rvprop="user", intoken="edit",
prop="info|revisions", rvlimit=1, rvdir="newer",
titles=self._title, inprop="protection|url")
@@ -263,7 +263,7 @@ class Page(CopyrightMixIn):
want to force content reloading.
"""
if not result:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", prop="revisions", rvlimit=1,
rvprop="content|timestamp", titles=self._title)

@@ -310,8 +310,8 @@ class Page(CopyrightMixIn):

# Try the API query, catching most errors with our handler:
try:
result = self._site.api_query(**params)
except exceptions.SiteAPIError as error:
result = self.site.api_query(**params)
except exceptions.APIError as error:
if not hasattr(error, "code"):
raise # We can only handle errors with a code attribute
result = self._handle_edit_errors(error, params, tries)
@@ -375,12 +375,12 @@ class Page(CopyrightMixIn):

elif error.code in ["noedit-anon", "cantcreate-anon",
"noimageredirect-anon"]:
if not all(self._site._login_info):
if not all(self.site._login_info):
# Insufficient login info:
raise exceptions.PermissionsError(error.info)
if tries == 0:
# We have login info; try to login:
self._site._login(self._site._login_info)
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1)
else:
@@ -416,13 +416,13 @@ class Page(CopyrightMixIn):
log in. Otherwise, raise PermissionsError with details.
"""
if assertion == "user":
if not all(self._site._login_info):
if not all(self.site._login_info):
# Insufficient login info:
e = "AssertEdit: user assertion failed, and no login info was provided."
raise exceptions.PermissionsError(e)
if tries == 0:
# We have login info; try to login:
self._site._login(self._site._login_info)
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1)
else:
@@ -502,8 +502,8 @@ class Page(CopyrightMixIn):
return self._fullurl
else:
slug = quote(self._title.replace(" ", "_"), safe="/:")
path = self._site._article_path.replace("$1", slug)
return ''.join((self._site.url, path))
path = self.site._article_path.replace("$1", slug)
return ''.join((self.site.url, path))

@property
def namespace(self):
@@ -580,7 +580,7 @@ class Page(CopyrightMixIn):
otherwise missing or invalid.
"""
if self._namespace < 0:
ns = self._site.namespace_id_to_name(self._namespace)
ns = self.site.namespace_id_to_name(self._namespace)
e = u"Pages in the {0} namespace can't have talk pages.".format(ns)
raise exceptions.InvalidPageError(e)

@@ -594,7 +594,7 @@ class Page(CopyrightMixIn):
except IndexError:
body = self._title

new_prefix = self._site.namespace_id_to_name(new_ns)
new_prefix = self.site.namespace_id_to_name(new_ns)

# If the new page is in namespace 0, don't do ":Title" (it's correct,
# but unnecessary), just do "Title":
@@ -605,7 +605,7 @@ class Page(CopyrightMixIn):

if follow_redirects is None:
follow_redirects = self._follow_redirects
return Page(self._site, new_title, follow_redirects)
return Page(self.site, new_title, follow_redirects)

def get(self):
"""Return page content, which is cached if you try to call get again.
@@ -616,7 +616,7 @@ class Page(CopyrightMixIn):
if self._exists == self.PAGE_UNKNOWN:
# Kill two birds with one stone by doing an API query for both our
# attributes and our page content:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", rvlimit=1, titles=self._title,
prop="info|revisions", inprop="protection|url",
intoken="edit", rvprop="content|timestamp")
@@ -680,7 +680,7 @@ class Page(CopyrightMixIn):
if not self._creator:
self._load()
self._assert_existence()
return self._site.get_user(self._creator)
return self.site.get_user(self._creator)

def parse(self):
"""Parse the page content for templates, links, etc.


+ 26
- 7
earwigbot/wiki/site.py ファイルの表示

@@ -82,6 +82,8 @@ class Site(object):
- :py:meth:`get_category`: returns a Category for the given title
- :py:meth:`get_user`: returns a User object for the given name
"""
SERVICE_API = 1
SERVICE_SQL = 2

def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=None,
@@ -228,7 +230,7 @@ class Site(object):
e = e.format(error.code)
else:
e = "API query failed."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)

result = response.read()
if response.headers.get("Content-Encoding") == "gzip":
@@ -242,7 +244,7 @@ class Site(object):
"""Given API query params, return the URL to query and POST data."""
if not self._base_url or self._script_path is None:
e = "Tried to do an API query, but no API URL is known."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)

url = ''.join((self.url, self._script_path, "/api.php"))
params["format"] = "json" # This is the only format we understand
@@ -260,7 +262,7 @@ class Site(object):
res = loads(result) # Try to parse as a JSON object
except ValueError:
e = "API query failed: JSON could not be decoded."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)

try:
code = res["error"]["code"]
@@ -271,7 +273,7 @@ class Site(object):
if code == "maxlag": # We've been throttled by the server
if tries >= self._max_retries:
e = "Maximum number of retries reached ({0})."
raise exceptions.SiteAPIError(e.format(self._max_retries))
raise exceptions.APIError(e.format(self._max_retries))
tries += 1
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
self._logger.info(msg.format(info, wait, tries, self._max_retries))
@@ -279,7 +281,7 @@ class Site(object):
return self._api_query(params, tries=tries, wait=wait*2)
else: # Some unknown error occurred
e = 'API query failed: got error "{0}"; server says: "{1}".'
error = exceptions.SiteAPIError(e.format(code, info))
error = exceptions.APIError(e.format(code, info))
error.code, error.info = code, info
raise error

@@ -522,6 +524,10 @@ class Site(object):

self._sql_conn = oursql.connect(**args)

def _get_service_order(self):
"""DOCSTRING """
return [self.SERVICE_SQL, self.SERVICE_API]

@property
def name(self):
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
@@ -559,7 +565,7 @@ class Site(object):
This will first attempt to construct an API url from
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
both of these, or else we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If
:py:exc:`~earwigbot.exceptions.APIError`. If
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
``True``, otherwise HTTP.
@@ -578,7 +584,7 @@ class Site(object):
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
:py:exc:`~earwigbot.exceptions.APIError` with details. If that
reason was due to maxlag, we'll sleep for a bit and then repeat the
query until we exceed :py:attr:`self._max_retries`.

@@ -739,3 +745,16 @@ class Site(object):
else:
username = self._get_username()
return User(self, username)

def delegate(self, services, args=None, kwargs=None):
""" DOCSTRING"""
if not args:
args = ()
if not kwargs:
kwargs = {}

order = self._get_service_order()
for srv in order:
if srv in services:
return services[srv](*args, **kwargs)
raise exceptions.NoServiceError(services)

+ 7
- 7
earwigbot/wiki/user.py ファイルの表示

@@ -82,7 +82,7 @@ class User(object):

def __str__(self):
"""Return a nice string representation of the User."""
return '<User "{0}" of {1}>'.format(self._name, str(self._site))
return '<User "{0}" of {1}>'.format(self.name, str(self.site))

def _get_attribute(self, attr):
"""Internally used to get an attribute by name.
@@ -107,8 +107,8 @@ class User(object):
is not defined. This defines it.
"""
props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
result = self._site.api_query(action="query", list="users",
ususers=self._name, usprop=props)
result = self.site.api_query(action="query", list="users",
ususers=self._name, usprop=props)
res = result["query"]["users"][0]

# normalize our username in case it was entered oddly
@@ -275,9 +275,9 @@ class User(object):
No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""
prefix = self._site.namespace_id_to_name(constants.NS_USER)
prefix = self.site.namespace_id_to_name(constants.NS_USER)
pagename = ':'.join((prefix, self._name))
return Page(self._site, pagename)
return Page(self.site, pagename)

def get_talkpage(self):
"""Return a Page object representing the user's talkpage.
@@ -285,6 +285,6 @@ class User(object):
No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""
prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK)
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
pagename = ':'.join((prefix, self._name))
return Page(self._site, pagename)
return Page(self.site, pagename)

読み込み中…
キャンセル
保存