Merge branch 'feature/service_delegation' into develop

12 years ago · 5217875a1e
--- a/earwigbot/commands/afc_status.py
+++ b/earwigbot/commands/afc_status.py
@@ -109,12 +109,9 @@ class AFCStatus(Command):

    def count_submissions(self):
        """Returns the number of open AFC submissions (count of CAT:PEND)."""
        cat = self.site.get_category("Pending AfC submissions")
        subs = len(cat.get_members(use_sql=True))

        # Remove [[Wikipedia:Articles for creation/Redirects]] and
        # Subtract two for [[Wikipedia:Articles for creation/Redirects]] and
        # [[Wikipedia:Files for upload]], which aren't real submissions:
        return subs - 2
        return self.site.get_category("Pending AfC submissions").pages - 2

    def count_redirects(self):
        """Returns the number of open redirect submissions. Calculated as the
--- a/earwigbot/commands/afc_submissions.py
+++ b/earwigbot/commands/afc_submissions.py
@@ -55,8 +55,7 @@ class AFCSubmissions(Command):

        site = self.bot.wiki.get_site()
        category = site.get_category("Pending AfC submissions")
        limit = number + len(self.ignore_list)
        members = category.get_members(use_sql=True, limit=limit)
        members = category.get_members(limit=number + len(self.ignore_list))
        urls = [member.url for member in members if member.title not in self.ignore_list]
        pages = ", ".join(urls[:number])
        self.reply(data, "{0} pending AfC subs: {1}".format(number, pages))
--- a/earwigbot/exceptions.py
+++ b/earwigbot/exceptions.py
@@ -31,7 +31,9 @@ This module contains all exceptions used by EarwigBot::
     |    +-- BrokenSocketError
     +-- WikiToolsetError
          +-- SiteNotFoundError
          +-- SiteAPIError
          +-- NoServiceError
          +-- APIError
          +-- SQLError
          +-- LoginError
          +-- NamespaceNotFoundError
          +-- PageNotFoundError
@@ -45,7 +47,6 @@ This module contains all exceptions used by EarwigBot::
          |    +-- ContentTooBigError
          |    +-- SpamDetectedError
          |    +-- FilteredError
          +-- SQLError
          +-- CopyvioCheckError
               +-- UnknownSearchEngineError
               +-- UnsupportedSearchEngineError
@@ -81,7 +82,13 @@ class SiteNotFoundError(WikiToolsetError):
    Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`.
    """

 class SiteAPIError(WikiToolsetError):
 class NoServiceError(WikiToolsetError):
    """No service is functioning to handle a specific task.

    Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`.
    """

 class APIError(WikiToolsetError):
    """Couldn't connect to a site's API.

    Perhaps the server doesn't exist, our URL is wrong or incomplete, or
@@ -90,6 +97,12 @@ class SiteAPIError(WikiToolsetError):
    Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
    """

 class SQLError(WikiToolsetError):
    """Some error involving SQL querying occurred.

    Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
    """

 class LoginError(WikiToolsetError):
    """An error occured while trying to login.

@@ -188,12 +201,6 @@ class FilteredError(EditError):
    :py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
    """

 class SQLError(WikiToolsetError):
    """Some error involving SQL querying occurred.

    Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
    """

 class CopyvioCheckError(WikiToolsetError):
    """An error occured when checking a page for copyright violations.

--- a/earwigbot/tasks/afc_history.py
+++ b/earwigbot/tasks/afc_history.py
@@ -130,7 +130,7 @@ class AFCHistory(Task):
        q_delete = "DELETE FROM page WHERE page_id = ?"
        q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?"
        q_insert = "INSERT INTO page VALUES (?, ?, ?)"
        members = category.get_members(use_sql=True)
        members = category.get_members()

        with self.conn.cursor() as cursor:
            for title, pageid in members:
--- a/earwigbot/tasks/afc_statistics.py
+++ b/earwigbot/tasks/afc_statistics.py
@@ -271,9 +271,7 @@ class AFCStatistics(Task):
        tracked = [i[0] for i in cursor.fetchall()]

        category = self.site.get_category(self.pending_cat)
        pending = category.get_members(use_sql=True)

        for title, pageid in pending:
        for title, pageid in category.get_members():
            if title in self.ignore_list:
                continue
            if pageid not in tracked:
@@ -663,7 +661,7 @@ class AFCStatistics(Task):
                return None, None, None
            try:
                content = self.get_revision_content(revid)
            except exceptions.SiteAPIError:
            except exceptions.APIError:
                msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})"
                self.logger.exception(msg.format(pageid, chart))
                return None, None, None
--- a/earwigbot/wiki/category.py
+++ b/earwigbot/wiki/category.py
@@ -37,6 +37,13 @@ class Category(Page):
    the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category`
    is shorthand, accepting category names without the namespace prefix.

    *Attributes:*

    - :py:attr:`size`:    the total number of members in the category
    - :py:attr:`pages`:   the number of pages in the category
    - :py:attr:`files`:   the number of files in the category
    - :py:attr:`subcats`: the number of subcategories in the category

    *Public methods:*

    - :py:meth:`get_members`: iterates over Pages in the category
@@ -49,7 +56,27 @@ class Category(Page):

    def __str__(self):
        """Return a nice string representation of the Category."""
        return '<Category "{0}" of {1}>'.format(self.title, str(self._site))
        return '<Category "{0}" of {1}>'.format(self.title, str(self.site))

    def _get_members_via_api(self, limit, follow):
        """Iterate over Pages in the category using the API."""
        params = {"action": "query", "list": "categorymembers",
                  "cmtitle": self.title}

        while 1:
            params["cmlimit"] = limit if limit else "max"
            result = self.site.api_query(**params)
            for member in result["query"]["categorymembers"]:
                title = member["title"]
                yield self.site.get_page(title, follow_redirects=follow)

            if "query-continue" in result:
                qcontinue = result["query-continue"]["categorymembers"]
                params["cmcontinue"] = qcontinue["cmcontinue"]
                if limit:
                    limit -= len(result["query"]["categorymembers"])
            else:
                break

    def _get_members_via_sql(self, limit, follow):
        """Iterate over Pages in the category using SQL."""
@@ -60,55 +87,103 @@ class Category(Page):

        if limit:
            query += " LIMIT ?"
            result = self._site.sql_query(query, (title, limit))
            result = self.site.sql_query(query, (title, limit))
        else:
            result = self._site.sql_query(query, (title,))
            result = self.site.sql_query(query, (title,))

        members = list(result)
        for row in members:
            base = row[0].replace("_", " ").decode("utf8")
            namespace = self._site.namespace_id_to_name(row[1])
            namespace = self.site.namespace_id_to_name(row[1])
            if namespace:
                title = u":".join((namespace, base))
            else:  # Avoid doing a silly (albeit valid) ":Pagename" thing
                title = base
            yield self._site.get_page(title, follow_redirects=follow,
            yield self.site.get_page(title, follow_redirects=follow,
                                      pageid=row[2])

    def _get_members_via_api(self, limit, follow):
        """Iterate over Pages in the category using the API."""
        params = {"action": "query", "list": "categorymembers",
                  "cmtitle": self._title}
    def _get_size_via_api(self, member_type):
        """Return the size of the category using the API."""
        query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
        title = self.title.replace(" ", "_").split(":", 1)[1]
        if member_type == "size":
            result = self.site.sql_query(query, (title,))
        else:
            query += " AND cl_type = ?"
            result = self.site.sql_query(query, (title, member_type[:-1]))
        return list(result)[0]

    def _get_size_via_sql(self, member_type):
        """Return the size of the category using SQL."""
        result = self.site.api_query(action="query", prop="categoryinfo",
                                     cmtitle=self.title)
        info = result["query"]["pages"].values()[0]["categoryinfo"]
        return info[member_type]

    def _get_size(self, member_type):
        """Return the size of the category."""
        services = {
            self.site.SERVICE_API: self._size_via_api,
            self.site.SERVICE_SQL: self._size_via_sql
        }
        return self.site.delegate(services, (member_type,))

    @property
    def size(self):
        """The total number of members in the category.

        Includes pages, files, and subcats. Equal to :py:attr:`pages` +
        :py:attr:`files` + :py:attr:`subcats`. This will use either the API or
        SQL depending on which are enabled and the amount of lag on each. This
        is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.
        """
        return self._get_size("size")

        while 1:
            params["cmlimit"] = limit if limit else "max"
            result = self._site.api_query(**params)
            for member in result["query"]["categorymembers"]:
                title = member["title"]
                yield self._site.get_page(title, follow_redirects=follow)
    @property
    def pages(self):
        """The number of pages in the category.

            if "query-continue" in result:
                qcontinue = result["query-continue"]["categorymembers"]
                params["cmcontinue"] = qcontinue["cmcontinue"]
                if limit:
                    limit -= len(result["query"]["categorymembers"])
            else:
                break
        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.
        """
        return self._get_size("pages")

    @property
    def files(self):
        """The number of files in the category.

        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.
        """
        return self._get_size("files")

    def get_members(self, use_sql=False, limit=None, follow_redirects=None):
    @property
    def subcats(self):
        """The number of subcategories in the category.

        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.
        """
        return self._get_size("subcats")

    def get_members(self, limit=None, follow_redirects=None):
        """Iterate over Pages in the category.

        If *use_sql* is ``True``, we will use a SQL query instead of the API.
        Note that pages are retrieved from the API in chunks (by default, in
        500-page chunks for normal users and 5000-page chunks for bots and
        admins), so queries may be made as we go along. If *limit* is given, we
        will provide this many pages, or less if the category is smaller. By
        default, *limit* is ``None``, meaning we will keep iterating over
        members until the category is exhausted. *follow_redirects* is passed
        directly to :py:meth:`site.get_page()
        If *limit* is given, we will provide this many pages, or less if the
        category is smaller. By default, *limit* is ``None``, meaning we will
        keep iterating over members until the category is exhausted.
        *follow_redirects* is passed directly to :py:meth:`site.get_page()
        <earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which
        will use the value passed to our :py:meth:`__init__`.

        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.

        .. note::
           Be careful when iterating over very large categories with no limit.
           If using the API, at best, you will make one query per 5000 pages,
@@ -121,9 +196,10 @@ class Category(Page):
           thousand, in which case the sheer number of titles in memory becomes
           problematic.
        """
        services = {
            self.site.SERVICE_API: self._get_members_via_api,
            self.site.SERVICE_SQL: self._get_members_via_sql
        }
        if follow_redirects is None:
            follow_redirects = self._follow_redirects
        if use_sql:
            return self._get_members_via_sql(limit, follow_redirects)
        else:
            return self._get_members_via_api(limit, follow_redirects)
        return self.site.delegate(services, (follow_redirects,))
--- a/earwigbot/wiki/page.py
+++ b/earwigbot/wiki/page.py
@@ -117,7 +117,7 @@ class Page(CopyrightMixIn):
        prefix = self._title.split(":", 1)[0]
        if prefix != title:  # ignore a page that's titled "Category" or "User"
            try:
                self._namespace = self._site.namespace_name_to_id(prefix)
                self._namespace = self.site.namespace_name_to_id(prefix)
            except exceptions.NamespaceNotFoundError:
                self._namespace = 0
        else:
@@ -137,7 +137,7 @@ class Page(CopyrightMixIn):

    def __str__(self):
        """Return a nice string representation of the Page."""
        return '<Page "{0}" of {1}>'.format(self.title, str(self._site))
        return '<Page "{0}" of {1}>'.format(self.title, str(self.site))

    def _assert_validity(self):
        """Used to ensure that our page's title is valid.
@@ -199,7 +199,7 @@ class Page(CopyrightMixIn):
        Assuming the API is sound, this should not raise any exceptions.
        """
        if not result:
            query = self._site.api_query
            query = self.site.api_query
            result = query(action="query", rvprop="user", intoken="edit",
                           prop="info|revisions", rvlimit=1, rvdir="newer",
                           titles=self._title, inprop="protection|url")
@@ -263,7 +263,7 @@ class Page(CopyrightMixIn):
        want to force content reloading.
        """
        if not result:
            query = self._site.api_query
            query = self.site.api_query
            result = query(action="query", prop="revisions", rvlimit=1,
                           rvprop="content|timestamp", titles=self._title)

@@ -310,8 +310,8 @@ class Page(CopyrightMixIn):

        # Try the API query, catching most errors with our handler:
        try:
            result = self._site.api_query(**params)
        except exceptions.SiteAPIError as error:
            result = self.site.api_query(**params)
        except exceptions.APIError as error:
            if not hasattr(error, "code"):
                raise  # We can only handle errors with a code attribute
            result = self._handle_edit_errors(error, params, tries)
@@ -375,12 +375,12 @@ class Page(CopyrightMixIn):

        elif error.code in ["noedit-anon", "cantcreate-anon",
                            "noimageredirect-anon"]:
            if not all(self._site._login_info):
            if not all(self.site._login_info):
                # Insufficient login info:
                raise exceptions.PermissionsError(error.info)
            if tries == 0:
                # We have login info; try to login:
                self._site._login(self._site._login_info)
                self.site._login(self.site._login_info)
                self._token = None  # Need a new token; old one is invalid now
                return self._edit(params=params, tries=1)
            else:
@@ -416,13 +416,13 @@ class Page(CopyrightMixIn):
        log in. Otherwise, raise PermissionsError with details.
        """
        if assertion == "user":
            if not all(self._site._login_info):
            if not all(self.site._login_info):
                # Insufficient login info:
                e = "AssertEdit: user assertion failed, and no login info was provided."
                raise exceptions.PermissionsError(e)
            if tries == 0:
                # We have login info; try to login:
                self._site._login(self._site._login_info)
                self.site._login(self.site._login_info)
                self._token = None  # Need a new token; old one is invalid now
                return self._edit(params=params, tries=1)
            else:
@@ -476,7 +476,7 @@ class Page(CopyrightMixIn):
        Makes an API query only if we haven't already made one and the *pageid*
        parameter to :py:meth:`__init__` was left as ``None``, which should be
        true for all cases except when pages are returned by an SQL generator
        (like :py:meth:`category.get_members(use_sql=True)
        (like :py:meth:`category.get_members()
        <earwigbot.wiki.category.Category.get_members>`).

        Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
@@ -502,8 +502,8 @@ class Page(CopyrightMixIn):
            return self._fullurl
        else:
            slug = quote(self._title.replace(" ", "_"), safe="/:")
            path = self._site._article_path.replace("$1", slug)
            return ''.join((self._site.url, path))
            path = self.site._article_path.replace("$1", slug)
            return ''.join((self.site.url, path))

    @property
    def namespace(self):
@@ -580,7 +580,7 @@ class Page(CopyrightMixIn):
        otherwise missing or invalid.
        """
        if self._namespace < 0:
            ns = self._site.namespace_id_to_name(self._namespace)
            ns = self.site.namespace_id_to_name(self._namespace)
            e = u"Pages in the {0} namespace can't have talk pages.".format(ns)
            raise exceptions.InvalidPageError(e)

@@ -594,7 +594,7 @@ class Page(CopyrightMixIn):
        except IndexError:
            body = self._title

        new_prefix = self._site.namespace_id_to_name(new_ns)
        new_prefix = self.site.namespace_id_to_name(new_ns)

        # If the new page is in namespace 0, don't do ":Title" (it's correct,
        # but unnecessary), just do "Title":
@@ -605,7 +605,7 @@ class Page(CopyrightMixIn):

        if follow_redirects is None:
            follow_redirects = self._follow_redirects
        return Page(self._site, new_title, follow_redirects)
        return Page(self.site, new_title, follow_redirects)

    def get(self):
        """Return page content, which is cached if you try to call get again.
@@ -616,7 +616,7 @@ class Page(CopyrightMixIn):
        if self._exists == self.PAGE_UNKNOWN:
            # Kill two birds with one stone by doing an API query for both our
            # attributes and our page content:
            query = self._site.api_query
            query = self.site.api_query
            result = query(action="query", rvlimit=1, titles=self._title,
                           prop="info|revisions", inprop="protection|url",
                           intoken="edit", rvprop="content|timestamp")
@@ -680,7 +680,7 @@ class Page(CopyrightMixIn):
        if not self._creator:
            self._load()
            self._assert_existence()
        return self._site.get_user(self._creator)
        return self.site.get_user(self._creator)

    def parse(self):
        """Parse the page content for templates, links, etc.
--- a/earwigbot/wiki/site.py
+++ b/earwigbot/wiki/site.py
@@ -75,13 +75,17 @@ class Site(object):

    - :py:meth:`api_query`:            does an API query with kwargs as params
    - :py:meth:`sql_query`:            does an SQL query and yields its results
    - :py:meth:`get_replag`:           estimates the database replication lag
    - :py:meth:`get_maxlag`:           returns the internal database lag
    - :py:meth:`get_replag`:           estimates the external database lag
    - :py:meth:`namespace_id_to_name`: returns names associated with an NS id
    - :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name
    - :py:meth:`get_page`:             returns a Page for the given title
    - :py:meth:`get_category`:         returns a Category for the given title
    - :py:meth:`get_user`:             returns a User object for the given name
    - :py:meth:`delegate`:             controls when the API or SQL is used
    """
    SERVICE_API = 1
    SERVICE_SQL = 2

    def __init__(self, name=None, project=None, lang=None, base_url=None,
                 article_path=None, script_path=None, sql=None,
@@ -124,11 +128,13 @@ class Site(object):
        self._max_retries = 6
        self._last_query_time = 0
        self._api_lock = Lock()
        self._api_info_cache = {"maxlag": 0, "lastcheck": 0}

        # Attributes used for SQL queries:
        self._sql_data = sql
        self._sql_conn = None
        self._sql_lock = Lock()
        self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None}

        # Attribute used in copyright violation checks (see CopyrightMixIn):
        self._search_config = search_config
@@ -201,7 +207,7 @@ class Site(object):
            args.append(key + "=" + val)
        return "&".join(args)

    def _api_query(self, params, tries=0, wait=5):
    def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False):
        """Do an API query with *params* as a dict of parameters.

        See the documentation for :py:meth:`api_query` for full implementation
@@ -215,7 +221,7 @@ class Site(object):
            sleep(wait_time)
        self._last_query_time = time()

        url, data = self._build_api_query(params)
        url, data = self._build_api_query(params, ignore_maxlag)
        self._logger.debug("{0} -> {1}".format(url, data))

        try:
@@ -228,7 +234,7 @@ class Site(object):
                e = e.format(error.code)
            else:
                e = "API query failed."
            raise exceptions.SiteAPIError(e)
            raise exceptions.APIError(e)

        result = response.read()
        if response.headers.get("Content-Encoding") == "gzip":
@@ -238,17 +244,18 @@ class Site(object):

        return self._handle_api_query_result(result, params, tries, wait)

    def _build_api_query(self, params):
    def _build_api_query(self, params, ignore_maxlag):
        """Given API query params, return the URL to query and POST data."""
        if not self._base_url or self._script_path is None:
            e = "Tried to do an API query, but no API URL is known."
            raise exceptions.SiteAPIError(e)
            raise exceptions.APIError(e)

        url = ''.join((self.url, self._script_path, "/api.php"))
        params["format"] = "json"  # This is the only format we understand
        if self._assert_edit:  # If requested, ensure that we're logged in
            params["assert"] = self._assert_edit
        if self._maxlag:  # If requested, don't overload the servers
        if self._maxlag and not ignore_maxlag:
            # If requested, don't overload the servers:
            params["maxlag"] = self._maxlag

        data = self._urlencode_utf8(params)
@@ -260,7 +267,7 @@ class Site(object):
            res = loads(result)  # Try to parse as a JSON object
        except ValueError:
            e = "API query failed: JSON could not be decoded."
            raise exceptions.SiteAPIError(e)
            raise exceptions.APIError(e)

        try:
            code = res["error"]["code"]
@@ -271,7 +278,7 @@ class Site(object):
        if code == "maxlag":  # We've been throttled by the server
            if tries >= self._max_retries:
                e = "Maximum number of retries reached ({0})."
                raise exceptions.SiteAPIError(e.format(self._max_retries))
                raise exceptions.APIError(e.format(self._max_retries))
            tries += 1
            msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
            self._logger.info(msg.format(info, wait, tries, self._max_retries))
@@ -279,7 +286,7 @@ class Site(object):
            return self._api_query(params, tries=tries, wait=wait*2)
        else:  # Some unknown error occurred
            e = 'API query failed: got error "{0}"; server says: "{1}".'
            error = exceptions.SiteAPIError(e.format(code, info))
            error = exceptions.APIError(e.format(code, info))
            error.code, error.info = code, info
            raise error

@@ -522,6 +529,48 @@ class Site(object):

        self._sql_conn = oursql.connect(**args)

    def _get_service_order(self):
        """Return a preferred order for using services (e.g. the API and SQL).

        A list is returned, starting with the most preferred service first and
        ending with the least preferred one. Currently, there are only two
        services. SERVICE_API will always be included since the API is expected
        to be always usable. In normal circumstances, self.SERVICE_SQL will be
        first (with the API second), since using SQL directly is easier on the
        servers than making web queries with the API. self.SERVICE_SQL will be
        second if replag is greater than three minutes (a cached value updated
        every two minutes at most), *unless* API lag is also very high.
        self.SERVICE_SQL will not be included in the list if we cannot form a
        proper SQL connection.
        """
        now = time()
        if now - self._sql_info_cache["lastcheck"] > 120:
            self._sql_info_cache["lastcheck"] = now
            try:
                self._sql_info_cache["replag"] = sqllag = self.get_replag()
            except (exceptions.SQLError, oursql.Error):
                self._sql_info_cache["usable"] = False
                return [self.SERVICE_API]
            self._sql_info_cache["usable"] = True
        else:
            if not self._sql_info_cache["usable"]:
                return [self.SERVICE_API]

        if sqllag > 180:
            if not self._maxlag:
                return [self.SERVICE_API, self.SERVICE_SQL]
            if now - self._api_info_cache["lastcheck"] > 120:
                self._api_info_cache["lastcheck"] = now
                try:
                    self._api_info_cache["maxlag"] = apilag = self.get_maxlag()
                except exceptions.APIError:
                    self._api_info_cache["maxlag"] = apilag = 0
            if sqllag / (180.0 / self._maxlag) < apilag:
                return [self.SERVICE_SQL, self.SERVICE_API]
            return [self.SERVICE_API, self.SERVICE_SQL]

        return [self.SERVICE_SQL, self.SERVICE_API]

    @property
    def name(self):
        """The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
@@ -559,7 +608,7 @@ class Site(object):
        This will first attempt to construct an API url from
        :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
        both of these, or else we'll raise
        :py:exc:`~earwigbot.exceptions.SiteAPIError`. If
        :py:exc:`~earwigbot.exceptions.APIError`. If
        :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
        1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
        ``True``, otherwise HTTP.
@@ -578,7 +627,7 @@ class Site(object):
        load it as a JSON object, and return it.

        If our request failed for some reason, we'll raise
        :py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
        :py:exc:`~earwigbot.exceptions.APIError` with details. If that
        reason was due to maxlag, we'll sleep for a bit and then repeat the
        query until we exceed :py:attr:`self._max_retries`.

@@ -635,8 +684,30 @@ class Site(object):
                for result in cur:
                    yield result

    def get_maxlag(self, showall=False):
        """Return the internal database replication lag in seconds.

        In a typical setup, this function returns the replication lag *within*
        the WMF's cluster, *not* external replication lag affecting the
        Toolserver (see :py:meth:`get_replag` for that). This is useful when
        combined with the ``maxlag`` API query param (added by config), in
        which queries will be halted and retried if the lag is too high,
        usually above five seconds.

        With *showall*, will return a list of the lag for all servers in the
        cluster, not just the one with the highest lag.
        """
        params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"}
        if showall:
            params["sishowalldb"] = 1
        with self._api_lock:
            result = self._api_query(params, ignore_maxlag=True)
        if showall:
            return [server["lag"] for server in result["query"]["dbrepllag"]]
        return result["query"]["dbrepllag"][0]["lag"]

    def get_replag(self):
        """Return the estimated database replication lag in seconds.
        """Return the estimated external database replication lag in seconds.

        Requires SQL access. This function only makes sense on a replicated
        database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
@@ -739,3 +810,29 @@ class Site(object):
        else:
            username = self._get_username()
        return User(self, username)

    def delegate(self, services, args=None, kwargs=None):
        """Delegate a task to either the API or SQL depending on conditions.

        *services* should be a dictionary in which the key is the service name
        (:py:attr:`self.SERVICE_API <SERVICE_API>` or
        :py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the
        function to call for this service. All functions will be passed the
        same arguments the tuple *args* and the dict **kwargs**, which are both
        empty by default. The service order is determined by
        :py:meth:`_get_service_order`.

        Not every service needs an entry in the dictionary. Will raise
        :py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate
        service cannot be found.
        """
        if not args:
            args = ()
        if not kwargs:
            kwargs = {}

        order = self._get_service_order()
        for srv in order:
            if srv in services:
                return services[srv](*args, **kwargs)
        raise exceptions.NoServiceError(services)
--- a/earwigbot/wiki/user.py
+++ b/earwigbot/wiki/user.py
@@ -82,7 +82,7 @@ class User(object):

    def __str__(self):
        """Return a nice string representation of the User."""
        return '<User "{0}" of {1}>'.format(self._name, str(self._site))
        return '<User "{0}" of {1}>'.format(self.name, str(self.site))

    def _get_attribute(self, attr):
        """Internally used to get an attribute by name.
@@ -107,8 +107,8 @@ class User(object):
        is not defined. This defines it.
        """
        props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
        result = self._site.api_query(action="query", list="users",
                                      ususers=self._name, usprop=props)
        result = self.site.api_query(action="query", list="users",
                                     ususers=self._name, usprop=props)
        res = result["query"]["users"][0]

        # normalize our username in case it was entered oddly
@@ -275,9 +275,9 @@ class User(object):
        No checks are made to see if it exists or not. Proper site namespace
        conventions are followed.
        """
        prefix = self._site.namespace_id_to_name(constants.NS_USER)
        prefix = self.site.namespace_id_to_name(constants.NS_USER)
        pagename = ':'.join((prefix, self._name))
        return Page(self._site, pagename)
        return Page(self.site, pagename)

    def get_talkpage(self):
        """Return a Page object representing the user's talkpage.
@@ -285,6 +285,6 @@ class User(object):
        No checks are made to see if it exists or not. Proper site namespace
        conventions are followed.
        """
        prefix = self._site.namespace_id_to_name(constants.NS_USER_TALK)
        prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
        pagename = ':'.join((prefix, self._name))
        return Page(self._site, pagename)
        return Page(self.site, pagename)