First stages of service delegation.

12 years ago · 6433957ae9
--- a/earwigbot/commands/afc_status.py
+++ b/earwigbot/commands/afc_status.py
@@ -109,12 +109,9 @@ class AFCStatus(Command):

    def count_submissions(self):
        """Returns the number of open AFC submissions (count of CAT:PEND)."""
        cat = self.site.get_category("Pending AfC submissions")
        subs = len(cat.get_members(use_sql=True))

        # Remove [[Wikipedia:Articles for creation/Redirects]] and
        # Subtract two for [[Wikipedia:Articles for creation/Redirects]] and
        # [[Wikipedia:Files for upload]], which aren't real submissions:
        return subs - 2
        return self.site.get_category("Pending AfC submissions").pages - 2

    def count_redirects(self):
        """Returns the number of open redirect submissions. Calculated as the
--- a/earwigbot/exceptions.py
+++ b/earwigbot/exceptions.py
@@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot::
     |    +-- BrokenSocketError
     +-- WikiToolsetError
          +-- SiteNotFoundError
          +-- SiteAPIError
          +-- NoServiceError
          +-- APIError
          +-- SQLError
          +-- LoginError
          +-- NamespaceNotFoundError
          +-- PageNotFoundError
@@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot::
          |    +-- ContentTooBigError
          |    +-- SpamDetectedError
          |    +-- FilteredError
          +-- SQLError
          +-- CopyvioCheckError
               +-- UnknownSearchEngineError
               +-- UnsupportedSearchEngineError
@@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError):
    Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`.
    """

 class SiteAPIError(WikiToolsetError):
 class NoServiceError(WikiToolsetError):
    """No service is functioning to handle a specific task.

    Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`.
    """

 class APIError(WikiToolsetError):
    """Couldn't connect to a site's API.

    Perhaps the server doesn't exist, our URL is wrong or incomplete, or
@@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError):
    Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
    """

 class SQLError(WikiToolsetError):
    """Some error involving SQL querying occurred.

    Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
    """

 class LoginError(WikiToolsetError):
    """An error occured while trying to login.

@@ -188,12 +201,6 @@ class FilteredError(EditError):
    :py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
    """

 class SQLError(WikiToolsetError):
    """Some error involving SQL querying occurred.

    Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
    """

 class CopyvioCheckError(WikiToolsetError):
    """An error occured when checking a page for copyright violations.

--- a/earwigbot/tasks/afc_statistics.py
+++ b/earwigbot/tasks/afc_statistics.py
@@ -663,7 +663,7 @@ class AFCStatistics(Task):
                return None, None, None
            try:
                content = self.get_revision_content(revid)
            except exceptions.SiteAPIError:
            except exceptions.APIError:
                msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})"
                self.logger.exception(msg.format(pageid, chart))
                return None, None, None
--- a/earwigbot/wiki/category.py
+++ b/earwigbot/wiki/category.py
@@ -49,7 +49,7 @@ class Category(Page):

    def __str__(self):
        """Return a nice string representation of the Category."""
        return '<Category "{0}" of {1}>'.format(self.title, str(self._site))
        return '<Category "{0}" of {1}>'.format(self.title, str(self.site))

    def _get_members_via_sql(self, limit, follow):
        """Iterate over Pages in the category using SQL."""
@@ -60,32 +60,32 @@ class Category(Page):

        if limit:
            query += " LIMIT ?"
            result = self._site.sql_query(query, (title, limit))
            result = self.site.sql_query(query, (title, limit))
        else:
            result = self._site.sql_query(query, (title,))
            result = self.site.sql_query(query, (title,))

        members = list(result)
        for row in members:
            base = row[0].replace("_", " ").decode("utf8")
            namespace = self._site.namespace_id_to_name(row[1])
            namespace = self.site.namespace_id_to_name(row[1])
            if namespace:
                title = u":".join((namespace, base))
            else:  # Avoid doing a silly (albeit valid) ":Pagename" thing
                title = base
            yield self._site.get_page(title, follow_redirects=follow,
            yield self.site.get_page(title, follow_redirects=follow,
                                      pageid=row[2])

    def _get_members_via_api(self, limit, follow):
        """Iterate over Pages in the category using the API."""
        params = {"action": "query", "list": "categorymembers",
                  "cmtitle": self._title}
                  "cmtitle": self.title}

        while 1:
            params["cmlimit"] = limit if limit else "max"
            result = self._site.api_query(**params)
            result = self.site.api_query(**params)
            for member in result["query"]["categorymembers"]:
                title = member["title"]
                yield self._site.get_page(title, follow_redirects=follow)
                yield self.site.get_page(title, follow_redirects=follow)

            if "query-continue" in result:
                qcontinue = result["query-continue"]["categorymembers"]
@@ -95,6 +95,45 @@ class Category(Page):
            else:
                break

    def _get_size_via_sql(self, member_type):
        query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
        title = self.title.replace(" ", "_").split(":", 1)[1]
        if member_type == "size":
            result = self.site.sql_query(query, (title,))
        else:
            query += " AND cl_type = ?"
            result = self.site.sql_query(query, (title, member_type[:-1]))
        return list(result)[0]

    def _get_size_via_sql(self, member_type):
        result = self.site.api_query(action="query", prop="categoryinfo",
                                     cmtitle=self.title)
        info = result["query"]["pages"].values()[0]["categoryinfo"]
        return info[member_type]

    def _get_size(self, member_type):
        services = {
            self.site.SERVICE_API: self._size_via_api,
            self.site.SERVICE_SQL: self._size_via_sql
        }
        return self.site.delegate(services, (member_type,))

    @property
    def size(self):
        return self._get_size("size")

    @property
    def pages(self):
        return self._get_size("pages")

    @property
    def files(self):
        return self._get_size("files")

    @property
    def subcats(self):
        return self._get_size("subcats")

    def get_members(self, use_sql=False, limit=None, follow_redirects=None):
        """Iterate over Pages in the category.

--- a/earwigbot/wiki/page.py
+++ b/earwigbot/wiki/page.py
@@ -117,7 +117,7 @@ class Page(CopyrightMixIn):
        prefix = self._title.split(":", 1)[0]
        if prefix != title:  # ignore a page that's titled "Category" or "User"
            try:
                self._namespace = self._site.namespace_name_to_id(prefix)
                self._namespace = self.site.namespace_name_to_id(prefix)
            except exceptions.NamespaceNotFoundError:
                self._namespace = 0
        else:
@@ -137,7 +137,7 @@ class Page(CopyrightMixIn):

    def __str__(self):
        """Return a nice string representation of the Page."""
        return '<Page "{0}" of {1}>'.format(self.title, str(self._site))
        return '<Page "{0}" of {1}>'.format(self.title, str(self.site))

    def _assert_validity(self):
        """Used to ensure that our page's title is valid.
@@ -199,7 +199,7 @@ class Page(CopyrightMixIn):
        Assuming the API is sound, this should not raise any exceptions.
        """
        if not result:
            query = self._site.api_query
            query = self.site.api_query
            result = query(action="query", rvprop="user", intoken="edit",
                           prop="info|revisions", rvlimit=1, rvdir="newer",
                           titles=self._title, inprop="protection|url")
@@ -263,7 +263,7 @@ class Page(CopyrightMixIn):
        want to force content reloading.
        """
        if not result:
            query = self._site.api_query
            query = self.site.api_query
            result = query(action="query", prop="revisions", rvlimit=1,
                           rvprop="content|timestamp", titles=self._title)

@@ -310,8 +310,8 @@ class Page(CopyrightMixIn):

        # Try the API query, catching most errors with our handler:
        try:
            result = self._site.api_query(**params)
        except exceptions.SiteAPIError as error:
            result = self.site.api_query(**params)
        except exceptions.APIError as error:
            if not hasattr(error, "code"):
                raise  # We can only handle errors with a code attribute
            result = self._handle_edit_errors(error, params, tries)
@@ -375,12 +375,12 @@ class Page(CopyrightMixIn):

        elif error.code in ["noedit-anon", "cantcreate-anon",
                            "noimageredirect-anon"]:
            if not all(self._site._login_info):
            if not all(self.site._login_info):
                # Insufficient login info:
                raise exceptions.PermissionsError(error.info)
            if tries == 0:
                # We have login info; try to login:
                self._site._login(self._site._login_info)
                self.site._login(self.site._login_info)
                self._token = None  # Need a new token; old one is invalid now
                return self._edit(params=params, tries=1)
            else:
@@ -416,13 +416,13 @@ class Page(CopyrightMixIn):
        log in. Otherwise, raise PermissionsError with details.
        """
        if assertion == "user":
            if not all(self._site._login_info):
            if not all(self.site._login_info):
                # Insufficient login info:
                e = "AssertEdit: user assertion failed, and no login info was provided."
                raise exceptions.PermissionsError(e)
            if tries == 0:
                # We have login info; try to login:
                self._site._login(self._site._login_info)
                self.site._login(self.site._login_info)
                self._token = None  # Need a new token; old one is invalid now
                return self._edit(params=params, tries=1)
            else:
@@ -502,8 +502,8 @@ class Page(CopyrightMixIn):
            return self._fullurl
        else:
            slug = quote(self._title.replace(" ", "_"), safe="/:")
            path = self._site._article_path.replace("$1", slug)
            return ''.join((self._site.url, path))
            path = self.site._article_path.replace("$1", slug)
            return ''.join((self.site.url, path))

    @property
    def namespace(self):
@@ -580,7 +580,7 @@ class Page(CopyrightMixIn):
        otherwise missing or invalid.
        """
        if self._namespace < 0:
            ns = self._site.namespace_id_to_name(self._namespace)
            ns = self.site.namespace_id_to_name(self._namespace)
            e = u"Pages in the {0} namespace can't have talk pages.".format(ns)
            raise exceptions.InvalidPageError(e)

@@ -594,7 +594,7 @@ class Page(CopyrightMixIn):
        except IndexError:
            body = self._title

        new_prefix = self._site.namespace_id_to_name(new_ns)
        new_prefix = self.site.namespace_id_to_name(new_ns)

        # If the new page is in namespace 0, don't do ":Title" (it's correct,
        # but unnecessary), just do "Title":
@@ -605,7 +605,7 @@ class Page(CopyrightMixIn):

        if follow_redirects is None:
            follow_redirects = self._follow_redirects
        return Page(self._site, new_title, follow_redirects)
        return Page(self.site, new_title, follow_redirects)

    def get(self):
        """Return page content, which is cached if you try to call get again.
@@ -616,7 +616,7 @@ class Page(CopyrightMixIn):
        if self._exists == self.PAGE_UNKNOWN:
            # Kill two birds with one stone by doing an API query for both our
            # attributes and our page content:
            query = self._site.api_query
            query = self.site.api_query
            result = query(action="query", rvlimit=1, titles=self._title,
                           prop="info|revisions", inprop="protection|url",
                           intoken="edit", rvprop="content|timestamp")
@@ -680,7 +680,7 @@ class Page(CopyrightMixIn):
        if not self._creator:
            self._load()
            self._assert_existence()
        return self._site.get_user(self._creator)
        return self.site.get_user(self._creator)

    def parse(self):
        """Parse the page content for templates, links, etc.
--- a/earwigbot/wiki/site.py
+++ b/earwigbot/wiki/site.py
@@ -82,6 +82,8 @@ class Site(object):
    - :py:meth:`get_category`:         returns a Category for the given title
    - :py:meth:`get_user`:             returns a User object for the given name
    """
    SERVICE_API = 1
    SERVICE_SQL = 2

    def __init__(self, name=None, project=None, lang=None, base_url=None,
                 article_path=None, script_path=None, sql=None,
@@ -228,7 +230,7 @@ class Site(object):
                e = e.format(error.code)
            else:
                e = "API query failed."
            raise exceptions.SiteAPIError(e)
            raise exceptions.APIError(e)

        result = response.read()
        if response.headers.get("Content-Encoding") == "gzip":
@@ -242,7 +244,7 @@ class Site(object):
        """Given API query params, return the URL to query and POST data."""
        if not self._base_url or self._script_path is None:
            e = "Tried to do an API query, but no API URL is known."
            raise exceptions.SiteAPIError(e)
            raise exceptions.APIError(e)

        url = ''.join((self.url, self._script_path, "/api.php"))
        params["format"] = "json"  # This is the only format we understand
@@ -260,7 +262,7 @@ class Site(object):
            res = loads(result)  # Try to parse as a JSON object
        except ValueError:
            e = "API query failed: JSON could not be decoded."
            raise exceptions.SiteAPIError(e)
            raise exceptions.APIError(e)

        try:
            code = res["error"]["code"]
@@ -271,7 +273,7 @@ class Site(object):
        if code == "maxlag":  # We've been throttled by the server
            if tries >= self._max_retries:
                e = "Maximum number of retries reached ({0})."
                raise exceptions.SiteAPIError(e.format(self._max_retries))
                raise exceptions.APIError(e.format(self._max_retries))
            tries += 1
            msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
            self._logger.info(msg.format(info, wait, tries, self._max_retries))
@@ -279,7 +281,7 @@ class Site(object):
            return self._api_query(params, tries=tries, wait=wait*2)
        else:  # Some unknown error occurred
            e = 'API query failed: got error "{0}"; server says: "{1}".'
            error = exceptions.SiteAPIError(e.format(code, info))
            error = exceptions.APIError(e.format(code, info))
            error.code, error.info = code, info
            raise error

@@ -522,6 +524,10 @@ class Site(object):

        self._sql_conn = oursql.connect(**args)

    def _get_service_order(self):
        """DOCSTRING                                                                                            """
        return [self.SERVICE_SQL, self.SERVICE_API]

    @property
    def name(self):
        """The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
@@ -559,7 +565,7 @@ class Site(object):
        This will first attempt to construct an API url from
        :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
        both of these, or else we'll raise
        :py:exc:`~earwigbot.exceptions.SiteAPIError`. If
        :py:exc:`~earwigbot.exceptions.APIError`. If
        :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
        1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
        ``True``, otherwise HTTP.
@@ -578,7 +584,7 @@ class Site(object):
        load it as a JSON object, and return it.

        If our request failed for some reason, we'll raise
        :py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
        :py:exc:`~earwigbot.exceptions.APIError` with details. If that
        reason was due to maxlag, we'll sleep for a bit and then repeat the
        query until we exceed :py:attr:`self._max_retries`.

@@ -739,3 +745,16 @@ class Site(object):
        else:
            username = self._get_username()
        return User(self, username)

    def delegate(self, services, args=None, kwargs=None):
        """                                                                                             DOCSTRING"""
        if not args:
            args = ()
        if not kwargs:
            kwargs = {}

        order = self._get_service_order()
        for srv in order:
            if srv in services:
                return services[srv](*args, **kwargs)
        raise exceptions.NoServiceError(services)
--- a/earwigbot/wiki/user.py
+++ b/earwigbot/wiki/user.py
@@ -82,7 +82,7 @@ class User(object):

    def __str__(self):
        """Return a nice string representation of the User."""
        return '<User "{0}" of {1}>'.format(self._name, str(self._site))
        return '<User "{0}" of {1}>'.format(self.name, str(self.site))

    def _get_attribute(self, attr):
        """Internally used to get an attribute by name.
@@ -107,8 +107,8 @@ class User(object):
        is not defined. This defines it.
        """
        props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
        result = self._site.api_query(action="query", list="users",
                                      ususers=self._name, usprop=props)
        result = self.site.api_query(action="query", list="users",
                                     ususers=self._name, usprop=props)
        res = result["query"]["users"][0]

        # normalize our username in case it was entered oddly
@@ -275,9 +275,9 @@ class User(object):
        No checks are made to see if it exists or not. Proper site namespace
        conventions are followed.
        """
        prefix = self._site.namespace_id_to_name(constants.NS_USER)
        prefix = self.site.namespace_id_to_name(constants.NS_USER)
        pagename = ':'.join((prefix, self._name))
        return Page(self._site, pagename)
        return Page(self.site, pagename)

    def get_talkpage(self):
        """Return a Page object representing the user's talkpage.
@@ -285,6 +285,6 @@ class User(object):
        No checks are made to see if it exists or not. Proper site namespace
        conventions are followed.
        """
        prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK)
        prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
        pagename = ':'.join((prefix, self._name))
        return Page(self._site, pagename)
        return Page(self.site, pagename)