Browse Source

First stages of service delegation.

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
6433957ae9
7 changed files with 116 additions and 54 deletions
  1. +2
    -5
      earwigbot/commands/afc_status.py
  2. +16
    -9
      earwigbot/exceptions.py
  3. +1
    -1
      earwigbot/tasks/afc_statistics.py
  4. +47
    -8
      earwigbot/wiki/category.py
  5. +17
    -17
      earwigbot/wiki/page.py
  6. +26
    -7
      earwigbot/wiki/site.py
  7. +7
    -7
      earwigbot/wiki/user.py

+ 2
- 5
earwigbot/commands/afc_status.py View File

@@ -109,12 +109,9 @@ class AFCStatus(Command):


def count_submissions(self): def count_submissions(self):
"""Returns the number of open AFC submissions (count of CAT:PEND).""" """Returns the number of open AFC submissions (count of CAT:PEND)."""
cat = self.site.get_category("Pending AfC submissions")
subs = len(cat.get_members(use_sql=True))

# Remove [[Wikipedia:Articles for creation/Redirects]] and
# Subtract two for [[Wikipedia:Articles for creation/Redirects]] and
# [[Wikipedia:Files for upload]], which aren't real submissions: # [[Wikipedia:Files for upload]], which aren't real submissions:
return subs - 2
return self.site.get_category("Pending AfC submissions").pages - 2


def count_redirects(self): def count_redirects(self):
"""Returns the number of open redirect submissions. Calculated as the """Returns the number of open redirect submissions. Calculated as the


+ 16
- 9
earwigbot/exceptions.py View File

@@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot::
| +-- BrokenSocketError | +-- BrokenSocketError
+-- WikiToolsetError +-- WikiToolsetError
+-- SiteNotFoundError +-- SiteNotFoundError
+-- SiteAPIError
+-- NoServiceError
+-- APIError
+-- SQLError
+-- LoginError +-- LoginError
+-- NamespaceNotFoundError +-- NamespaceNotFoundError
+-- PageNotFoundError +-- PageNotFoundError
@@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot::
| +-- ContentTooBigError | +-- ContentTooBigError
| +-- SpamDetectedError | +-- SpamDetectedError
| +-- FilteredError | +-- FilteredError
+-- SQLError
+-- CopyvioCheckError +-- CopyvioCheckError
+-- UnknownSearchEngineError +-- UnknownSearchEngineError
+-- UnsupportedSearchEngineError +-- UnsupportedSearchEngineError
@@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError):
Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`. Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`.
""" """


class SiteAPIError(WikiToolsetError):
class NoServiceError(WikiToolsetError):
"""No service is functioning to handle a specific task.

Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`.
"""

class APIError(WikiToolsetError):
"""Couldn't connect to a site's API. """Couldn't connect to a site's API.


Perhaps the server doesn't exist, our URL is wrong or incomplete, or Perhaps the server doesn't exist, our URL is wrong or incomplete, or
@@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError):
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
""" """


class SQLError(WikiToolsetError):
"""Some error involving SQL querying occurred.

Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
"""

class LoginError(WikiToolsetError): class LoginError(WikiToolsetError):
"""An error occured while trying to login. """An error occured while trying to login.


@@ -188,12 +201,6 @@ class FilteredError(EditError):
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. :py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
""" """


class SQLError(WikiToolsetError):
"""Some error involving SQL querying occurred.

Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
"""

class CopyvioCheckError(WikiToolsetError): class CopyvioCheckError(WikiToolsetError):
"""An error occured when checking a page for copyright violations. """An error occured when checking a page for copyright violations.




+ 1
- 1
earwigbot/tasks/afc_statistics.py View File

@@ -663,7 +663,7 @@ class AFCStatistics(Task):
return None, None, None return None, None, None
try: try:
content = self.get_revision_content(revid) content = self.get_revision_content(revid)
except exceptions.SiteAPIError:
except exceptions.APIError:
msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})" msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})"
self.logger.exception(msg.format(pageid, chart)) self.logger.exception(msg.format(pageid, chart))
return None, None, None return None, None, None


+ 47
- 8
earwigbot/wiki/category.py View File

@@ -49,7 +49,7 @@ class Category(Page):


def __str__(self): def __str__(self):
"""Return a nice string representation of the Category.""" """Return a nice string representation of the Category."""
return '<Category "{0}" of {1}>'.format(self.title, str(self._site))
return '<Category "{0}" of {1}>'.format(self.title, str(self.site))


def _get_members_via_sql(self, limit, follow): def _get_members_via_sql(self, limit, follow):
"""Iterate over Pages in the category using SQL.""" """Iterate over Pages in the category using SQL."""
@@ -60,32 +60,32 @@ class Category(Page):


if limit: if limit:
query += " LIMIT ?" query += " LIMIT ?"
result = self._site.sql_query(query, (title, limit))
result = self.site.sql_query(query, (title, limit))
else: else:
result = self._site.sql_query(query, (title,))
result = self.site.sql_query(query, (title,))


members = list(result) members = list(result)
for row in members: for row in members:
base = row[0].replace("_", " ").decode("utf8") base = row[0].replace("_", " ").decode("utf8")
namespace = self._site.namespace_id_to_name(row[1])
namespace = self.site.namespace_id_to_name(row[1])
if namespace: if namespace:
title = u":".join((namespace, base)) title = u":".join((namespace, base))
else: # Avoid doing a silly (albeit valid) ":Pagename" thing else: # Avoid doing a silly (albeit valid) ":Pagename" thing
title = base title = base
yield self._site.get_page(title, follow_redirects=follow,
yield self.site.get_page(title, follow_redirects=follow,
pageid=row[2]) pageid=row[2])


def _get_members_via_api(self, limit, follow): def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API.""" """Iterate over Pages in the category using the API."""
params = {"action": "query", "list": "categorymembers", params = {"action": "query", "list": "categorymembers",
"cmtitle": self._title}
"cmtitle": self.title}


while 1: while 1:
params["cmlimit"] = limit if limit else "max" params["cmlimit"] = limit if limit else "max"
result = self._site.api_query(**params)
result = self.site.api_query(**params)
for member in result["query"]["categorymembers"]: for member in result["query"]["categorymembers"]:
title = member["title"] title = member["title"]
yield self._site.get_page(title, follow_redirects=follow)
yield self.site.get_page(title, follow_redirects=follow)


if "query-continue" in result: if "query-continue" in result:
qcontinue = result["query-continue"]["categorymembers"] qcontinue = result["query-continue"]["categorymembers"]
@@ -95,6 +95,45 @@ class Category(Page):
else: else:
break break


def _get_size_via_sql(self, member_type):
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
title = self.title.replace(" ", "_").split(":", 1)[1]
if member_type == "size":
result = self.site.sql_query(query, (title,))
else:
query += " AND cl_type = ?"
result = self.site.sql_query(query, (title, member_type[:-1]))
return list(result)[0]

def _get_size_via_sql(self, member_type):
result = self.site.api_query(action="query", prop="categoryinfo",
cmtitle=self.title)
info = result["query"]["pages"].values()[0]["categoryinfo"]
return info[member_type]

def _get_size(self, member_type):
services = {
self.site.SERVICE_API: self._size_via_api,
self.site.SERVICE_SQL: self._size_via_sql
}
return self.site.delegate(services, (member_type,))

@property
def size(self):
return self._get_size("size")

@property
def pages(self):
return self._get_size("pages")

@property
def files(self):
return self._get_size("files")

@property
def subcats(self):
return self._get_size("subcats")

def get_members(self, use_sql=False, limit=None, follow_redirects=None): def get_members(self, use_sql=False, limit=None, follow_redirects=None):
"""Iterate over Pages in the category. """Iterate over Pages in the category.




+ 17
- 17
earwigbot/wiki/page.py View File

@@ -117,7 +117,7 @@ class Page(CopyrightMixIn):
prefix = self._title.split(":", 1)[0] prefix = self._title.split(":", 1)[0]
if prefix != title: # ignore a page that's titled "Category" or "User" if prefix != title: # ignore a page that's titled "Category" or "User"
try: try:
self._namespace = self._site.namespace_name_to_id(prefix)
self._namespace = self.site.namespace_name_to_id(prefix)
except exceptions.NamespaceNotFoundError: except exceptions.NamespaceNotFoundError:
self._namespace = 0 self._namespace = 0
else: else:
@@ -137,7 +137,7 @@ class Page(CopyrightMixIn):


def __str__(self): def __str__(self):
"""Return a nice string representation of the Page.""" """Return a nice string representation of the Page."""
return '<Page "{0}" of {1}>'.format(self.title, str(self._site))
return '<Page "{0}" of {1}>'.format(self.title, str(self.site))


def _assert_validity(self): def _assert_validity(self):
"""Used to ensure that our page's title is valid. """Used to ensure that our page's title is valid.
@@ -199,7 +199,7 @@ class Page(CopyrightMixIn):
Assuming the API is sound, this should not raise any exceptions. Assuming the API is sound, this should not raise any exceptions.
""" """
if not result: if not result:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", rvprop="user", intoken="edit", result = query(action="query", rvprop="user", intoken="edit",
prop="info|revisions", rvlimit=1, rvdir="newer", prop="info|revisions", rvlimit=1, rvdir="newer",
titles=self._title, inprop="protection|url") titles=self._title, inprop="protection|url")
@@ -263,7 +263,7 @@ class Page(CopyrightMixIn):
want to force content reloading. want to force content reloading.
""" """
if not result: if not result:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", prop="revisions", rvlimit=1, result = query(action="query", prop="revisions", rvlimit=1,
rvprop="content|timestamp", titles=self._title) rvprop="content|timestamp", titles=self._title)


@@ -310,8 +310,8 @@ class Page(CopyrightMixIn):


# Try the API query, catching most errors with our handler: # Try the API query, catching most errors with our handler:
try: try:
result = self._site.api_query(**params)
except exceptions.SiteAPIError as error:
result = self.site.api_query(**params)
except exceptions.APIError as error:
if not hasattr(error, "code"): if not hasattr(error, "code"):
raise # We can only handle errors with a code attribute raise # We can only handle errors with a code attribute
result = self._handle_edit_errors(error, params, tries) result = self._handle_edit_errors(error, params, tries)
@@ -375,12 +375,12 @@ class Page(CopyrightMixIn):


elif error.code in ["noedit-anon", "cantcreate-anon", elif error.code in ["noedit-anon", "cantcreate-anon",
"noimageredirect-anon"]: "noimageredirect-anon"]:
if not all(self._site._login_info):
if not all(self.site._login_info):
# Insufficient login info: # Insufficient login info:
raise exceptions.PermissionsError(error.info) raise exceptions.PermissionsError(error.info)
if tries == 0: if tries == 0:
# We have login info; try to login: # We have login info; try to login:
self._site._login(self._site._login_info)
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1) return self._edit(params=params, tries=1)
else: else:
@@ -416,13 +416,13 @@ class Page(CopyrightMixIn):
log in. Otherwise, raise PermissionsError with details. log in. Otherwise, raise PermissionsError with details.
""" """
if assertion == "user": if assertion == "user":
if not all(self._site._login_info):
if not all(self.site._login_info):
# Insufficient login info: # Insufficient login info:
e = "AssertEdit: user assertion failed, and no login info was provided." e = "AssertEdit: user assertion failed, and no login info was provided."
raise exceptions.PermissionsError(e) raise exceptions.PermissionsError(e)
if tries == 0: if tries == 0:
# We have login info; try to login: # We have login info; try to login:
self._site._login(self._site._login_info)
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1) return self._edit(params=params, tries=1)
else: else:
@@ -502,8 +502,8 @@ class Page(CopyrightMixIn):
return self._fullurl return self._fullurl
else: else:
slug = quote(self._title.replace(" ", "_"), safe="/:") slug = quote(self._title.replace(" ", "_"), safe="/:")
path = self._site._article_path.replace("$1", slug)
return ''.join((self._site.url, path))
path = self.site._article_path.replace("$1", slug)
return ''.join((self.site.url, path))


@property @property
def namespace(self): def namespace(self):
@@ -580,7 +580,7 @@ class Page(CopyrightMixIn):
otherwise missing or invalid. otherwise missing or invalid.
""" """
if self._namespace < 0: if self._namespace < 0:
ns = self._site.namespace_id_to_name(self._namespace)
ns = self.site.namespace_id_to_name(self._namespace)
e = u"Pages in the {0} namespace can't have talk pages.".format(ns) e = u"Pages in the {0} namespace can't have talk pages.".format(ns)
raise exceptions.InvalidPageError(e) raise exceptions.InvalidPageError(e)


@@ -594,7 +594,7 @@ class Page(CopyrightMixIn):
except IndexError: except IndexError:
body = self._title body = self._title


new_prefix = self._site.namespace_id_to_name(new_ns)
new_prefix = self.site.namespace_id_to_name(new_ns)


# If the new page is in namespace 0, don't do ":Title" (it's correct, # If the new page is in namespace 0, don't do ":Title" (it's correct,
# but unnecessary), just do "Title": # but unnecessary), just do "Title":
@@ -605,7 +605,7 @@ class Page(CopyrightMixIn):


if follow_redirects is None: if follow_redirects is None:
follow_redirects = self._follow_redirects follow_redirects = self._follow_redirects
return Page(self._site, new_title, follow_redirects)
return Page(self.site, new_title, follow_redirects)


def get(self): def get(self):
"""Return page content, which is cached if you try to call get again. """Return page content, which is cached if you try to call get again.
@@ -616,7 +616,7 @@ class Page(CopyrightMixIn):
if self._exists == self.PAGE_UNKNOWN: if self._exists == self.PAGE_UNKNOWN:
# Kill two birds with one stone by doing an API query for both our # Kill two birds with one stone by doing an API query for both our
# attributes and our page content: # attributes and our page content:
query = self._site.api_query
query = self.site.api_query
result = query(action="query", rvlimit=1, titles=self._title, result = query(action="query", rvlimit=1, titles=self._title,
prop="info|revisions", inprop="protection|url", prop="info|revisions", inprop="protection|url",
intoken="edit", rvprop="content|timestamp") intoken="edit", rvprop="content|timestamp")
@@ -680,7 +680,7 @@ class Page(CopyrightMixIn):
if not self._creator: if not self._creator:
self._load() self._load()
self._assert_existence() self._assert_existence()
return self._site.get_user(self._creator)
return self.site.get_user(self._creator)


def parse(self): def parse(self):
"""Parse the page content for templates, links, etc. """Parse the page content for templates, links, etc.


+ 26
- 7
earwigbot/wiki/site.py View File

@@ -82,6 +82,8 @@ class Site(object):
- :py:meth:`get_category`: returns a Category for the given title - :py:meth:`get_category`: returns a Category for the given title
- :py:meth:`get_user`: returns a User object for the given name - :py:meth:`get_user`: returns a User object for the given name
""" """
SERVICE_API = 1
SERVICE_SQL = 2


def __init__(self, name=None, project=None, lang=None, base_url=None, def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=None, article_path=None, script_path=None, sql=None,
@@ -228,7 +230,7 @@ class Site(object):
e = e.format(error.code) e = e.format(error.code)
else: else:
e = "API query failed." e = "API query failed."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)


result = response.read() result = response.read()
if response.headers.get("Content-Encoding") == "gzip": if response.headers.get("Content-Encoding") == "gzip":
@@ -242,7 +244,7 @@ class Site(object):
"""Given API query params, return the URL to query and POST data.""" """Given API query params, return the URL to query and POST data."""
if not self._base_url or self._script_path is None: if not self._base_url or self._script_path is None:
e = "Tried to do an API query, but no API URL is known." e = "Tried to do an API query, but no API URL is known."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)


url = ''.join((self.url, self._script_path, "/api.php")) url = ''.join((self.url, self._script_path, "/api.php"))
params["format"] = "json" # This is the only format we understand params["format"] = "json" # This is the only format we understand
@@ -260,7 +262,7 @@ class Site(object):
res = loads(result) # Try to parse as a JSON object res = loads(result) # Try to parse as a JSON object
except ValueError: except ValueError:
e = "API query failed: JSON could not be decoded." e = "API query failed: JSON could not be decoded."
raise exceptions.SiteAPIError(e)
raise exceptions.APIError(e)


try: try:
code = res["error"]["code"] code = res["error"]["code"]
@@ -271,7 +273,7 @@ class Site(object):
if code == "maxlag": # We've been throttled by the server if code == "maxlag": # We've been throttled by the server
if tries >= self._max_retries: if tries >= self._max_retries:
e = "Maximum number of retries reached ({0})." e = "Maximum number of retries reached ({0})."
raise exceptions.SiteAPIError(e.format(self._max_retries))
raise exceptions.APIError(e.format(self._max_retries))
tries += 1 tries += 1
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
self._logger.info(msg.format(info, wait, tries, self._max_retries)) self._logger.info(msg.format(info, wait, tries, self._max_retries))
@@ -279,7 +281,7 @@ class Site(object):
return self._api_query(params, tries=tries, wait=wait*2) return self._api_query(params, tries=tries, wait=wait*2)
else: # Some unknown error occurred else: # Some unknown error occurred
e = 'API query failed: got error "{0}"; server says: "{1}".' e = 'API query failed: got error "{0}"; server says: "{1}".'
error = exceptions.SiteAPIError(e.format(code, info))
error = exceptions.APIError(e.format(code, info))
error.code, error.info = code, info error.code, error.info = code, info
raise error raise error


@@ -522,6 +524,10 @@ class Site(object):


self._sql_conn = oursql.connect(**args) self._sql_conn = oursql.connect(**args)


def _get_service_order(self):
"""DOCSTRING """
return [self.SERVICE_SQL, self.SERVICE_API]

@property @property
def name(self): def name(self):
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" """The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
@@ -559,7 +565,7 @@ class Site(object):
This will first attempt to construct an API url from This will first attempt to construct an API url from
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
both of these, or else we'll raise both of these, or else we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If
:py:exc:`~earwigbot.exceptions.APIError`. If
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is 1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
``True``, otherwise HTTP. ``True``, otherwise HTTP.
@@ -578,7 +584,7 @@ class Site(object):
load it as a JSON object, and return it. load it as a JSON object, and return it.


If our request failed for some reason, we'll raise If our request failed for some reason, we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
:py:exc:`~earwigbot.exceptions.APIError` with details. If that
reason was due to maxlag, we'll sleep for a bit and then repeat the reason was due to maxlag, we'll sleep for a bit and then repeat the
query until we exceed :py:attr:`self._max_retries`. query until we exceed :py:attr:`self._max_retries`.


@@ -739,3 +745,16 @@ class Site(object):
else: else:
username = self._get_username() username = self._get_username()
return User(self, username) return User(self, username)

def delegate(self, services, args=None, kwargs=None):
""" DOCSTRING"""
if not args:
args = ()
if not kwargs:
kwargs = {}

order = self._get_service_order()
for srv in order:
if srv in services:
return services[srv](*args, **kwargs)
raise exceptions.NoServiceError(services)

+ 7
- 7
earwigbot/wiki/user.py View File

@@ -82,7 +82,7 @@ class User(object):


def __str__(self): def __str__(self):
"""Return a nice string representation of the User.""" """Return a nice string representation of the User."""
return '<User "{0}" of {1}>'.format(self._name, str(self._site))
return '<User "{0}" of {1}>'.format(self.name, str(self.site))


def _get_attribute(self, attr): def _get_attribute(self, attr):
"""Internally used to get an attribute by name. """Internally used to get an attribute by name.
@@ -107,8 +107,8 @@ class User(object):
is not defined. This defines it. is not defined. This defines it.
""" """
props = "blockinfo|groups|rights|editcount|registration|emailable|gender" props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
result = self._site.api_query(action="query", list="users",
ususers=self._name, usprop=props)
result = self.site.api_query(action="query", list="users",
ususers=self._name, usprop=props)
res = result["query"]["users"][0] res = result["query"]["users"][0]


# normalize our username in case it was entered oddly # normalize our username in case it was entered oddly
@@ -275,9 +275,9 @@ class User(object):
No checks are made to see if it exists or not. Proper site namespace No checks are made to see if it exists or not. Proper site namespace
conventions are followed. conventions are followed.
""" """
prefix = self._site.namespace_id_to_name(constants.NS_USER)
prefix = self.site.namespace_id_to_name(constants.NS_USER)
pagename = ':'.join((prefix, self._name)) pagename = ':'.join((prefix, self._name))
return Page(self._site, pagename)
return Page(self.site, pagename)


def get_talkpage(self): def get_talkpage(self):
"""Return a Page object representing the user's talkpage. """Return a Page object representing the user's talkpage.
@@ -285,6 +285,6 @@ class User(object):
No checks are made to see if it exists or not. Proper site namespace No checks are made to see if it exists or not. Proper site namespace
conventions are followed. conventions are followed.
""" """
prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK)
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
pagename = ':'.join((prefix, self._name)) pagename = ':'.join((prefix, self._name))
return Page(self._site, pagename)
return Page(self.site, pagename)

Loading…
Cancel
Save