Code cleanup and typing

2 달 전 · 6a7e6dcad9
--- a/+ 1
+++ b/+ 1
@@ -1,6 +1,6 @@
 v0.4 (unreleased):

 - Migrated to Python 3 (3.11+).
 - Migrated to Python 3 (3.11+). Substantial code cleanup.
 - Migrated from oursql to pymysql.
 - Copyvios: Configurable proxy support for specific domains.
 - Copyvios: Parser-directed URL redirection.
--- a/earwigbot/init.py
+++ b/earwigbot/init.py
@@ -1,4 +1,4 @@
 # Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
 # Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -20,15 +20,16 @@

 """
 `EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits
 Wikipedia and interacts with people over IRC.
 Wikipedia and interacts over IRC.

 See :file:`README.rst` for an overview, or the :file:`docs/` directory for
 details. This documentation is also available `online
 <https://packages.python.org/earwigbot>`_.
 See :file:`README.rst` for an overview, or the :file:`docs/` directory for details.
 This documentation is also available `online <https://packages.python.org/earwigbot>`_.
 """

 import typing

 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2009-2019 Ben Kurtovic"
 __copyright__ = "Copyright (C) 2009-2024 Ben Kurtovic"
 __license__ = "MIT License"
 __version__ = "0.4.dev0"
 __email__ = "ben.kurtovic@gmail.com"
@@ -57,12 +58,26 @@ from earwigbot import lazy

 importer = lazy.LazyImporter()

 bot = importer.new("earwigbot.bot")
 commands = importer.new("earwigbot.commands")
 config = importer.new("earwigbot.config")
 exceptions = importer.new("earwigbot.exceptions")
 irc = importer.new("earwigbot.irc")
 managers = importer.new("earwigbot.managers")
 tasks = importer.new("earwigbot.tasks")
 util = importer.new("earwigbot.util")
 wiki = importer.new("earwigbot.wiki")
 if typing.TYPE_CHECKING:
    from earwigbot import (
        bot,
        commands,
        config,
        exceptions,
        irc,
        managers,
        tasks,
        util,
        wiki,
    )

 else:
    bot = importer.new("earwigbot.bot")
    commands = importer.new("earwigbot.commands")
    config = importer.new("earwigbot.config")
    exceptions = importer.new("earwigbot.exceptions")
    irc = importer.new("earwigbot.irc")
    managers = importer.new("earwigbot.managers")
    tasks = importer.new("earwigbot.tasks")
    util = importer.new("earwigbot.util")
    wiki = importer.new("earwigbot.wiki")
--- a/earwigbot/exceptions.py
+++ b/earwigbot/exceptions.py
@@ -107,6 +107,9 @@ class APIError(ServiceError):
    Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
    """

    code: str
    info: str


 class SQLError(ServiceError):
    """Some error involving SQL querying occurred.
--- a/earwigbot/tasks/wikiproject_tagger.py
+++ b/earwigbot/tasks/wikiproject_tagger.py
@@ -43,13 +43,14 @@ JobKwargs = TypedDict(
        "nocreate": NotRequired[bool],
        "recursive": NotRequired[bool | int],
        "tag-categories": NotRequired[bool],
        "not-in-category": NotRequired[str],
        "site": NotRequired[str],
        "dry-run": NotRequired[bool],
    },
 )


@dataclass
@dataclass(frozen=True)
 class Job:
    """
    Represents a single wikiproject-tagging task.
@@ -68,11 +69,20 @@ class Job:
    only_with: set[str] | None
    nocreate: bool
    tag_categories: bool
    not_in_category: str | None
    dry_run: bool

    counter: int = 0
    _counter: list[int] = [0]  # Wrap to allow frozen updates
    processed_cats: set[str] = field(default_factory=set)
    processed_pages: set[str] = field(default_factory=set)
    skip_pages: set[str] = field(default_factory=set)

    @property
    def counter(self) -> int:
        return self._counter[0]

    def add_to_counter(self, value: int) -> None:
        self._counter[0] += value


 class ShutoffEnabled(Exception):
@@ -90,7 +100,7 @@ class WikiProjectTagger(Task):
    Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER
    [--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS]
    [--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]]
    [--site SITE] [--dry-run]`
    [--not-in-category CAT] [--site SITE] [--dry-run]`

    .. glossary::

@@ -126,6 +136,8 @@ class WikiProjectTagger(Task):
        ``NUM`` isn't provided, go infinitely (this can be dangerous)
    ``--tag-categories``
        also tag category pages
    ``--not-in-category CAT``
        skip talk pages that are already members of this category
    ``--site SITE``
        the ID of the site to tag pages on, defaulting to the default site
    ``--dry-run``
@@ -189,6 +201,7 @@ class WikiProjectTagger(Task):
        nocreate = kwargs.get("nocreate", False)
        recursive = kwargs.get("recursive", 0)
        tag_categories = kwargs.get("tag-categories", False)
        not_in_category = kwargs.get("not-in-category")
        dry_run = kwargs.get("dry-run", False)
        banner, names = self.get_names(site, banner)
        if not names:
@@ -210,6 +223,7 @@ class WikiProjectTagger(Task):
            only_with=only_with,
            nocreate=nocreate,
            tag_categories=tag_categories,
            not_in_category=not_in_category,
            dry_run=dry_run,
        )

@@ -224,6 +238,11 @@ class WikiProjectTagger(Task):
        """
        Run a tagging *job* on a given *site*.
        """
        if job.not_in_category:
            skip_category = site.get_category(job.not_in_category)
            for page in skip_category.get_members():
                job.skip_pages.add(page.title)

        if "category" in kwargs:
            title = kwargs["category"]
            title = self.guess_namespace(site, title, constants.NS_CATEGORY)
@@ -322,6 +341,10 @@ class WikiProjectTagger(Task):
        if not page.is_talkpage:
            page = page.toggle_talk()

        if page.title in job.skip_pages:
            self.logger.debug(f"Skipping page, in category to skip: [[{page.title}]]")
            return

        if page.title in job.processed_pages:
            self.logger.debug(f"Skipping page, already processed: [[{page.title}]]")
            return
@@ -330,7 +353,7 @@ class WikiProjectTagger(Task):
        if job.counter % 10 == 0:  # Do a shutoff check every ten pages
            if self.shutoff_enabled(page.site):
                raise ShutoffEnabled()
        job.counter += 1
        job.add_to_counter(1)

        try:
            code = page.parse()
--- a/earwigbot/wiki/category.py
+++ b/earwigbot/wiki/category.py
@@ -1,4 +1,4 @@
 # Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 # Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -18,6 +18,9 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from collections.abc import Iterator

 from earwigbot.wiki.constants import Service
 from earwigbot.wiki.page import Page

 __all__ = ["Category"]
@@ -27,14 +30,14 @@ class Category(Page):
    """
    **EarwigBot: Wiki Toolset: Category**

    Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a
    subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional
    methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should
    work fine on :py:class:`Category` objects. :py:meth:`site.get_page()
    <earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category`
    instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in
    the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category`
    is shorthand, accepting category names without the namespace prefix.
    Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a subclass
    of :py:class:`~earwigbot.wiki.page.Page`. Provides additional methods, but
    :py:class:`~earwigbot.wiki.page.Page`'s own methods should work fine on
    :py:class:`Category` objects. :py:meth:`site.get_page()
    <earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` instead of
    a :py:class:`~earwigbot.wiki.page.Page` if the given title is in the category
    namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` is shorthand,
    accepting category names without the namespace prefix.

    *Attributes:*

@@ -48,22 +51,30 @@ class Category(Page):
    - :py:meth:`get_members`: iterates over Pages in the category
    """

    def __repr__(self):
        """Return the canonical string representation of the Category."""
    def __repr__(self) -> str:
        """
        Return the canonical string representation of the Category.
        """
        res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})"
        return res.format(self._title, self._follow_redirects, self._site)

    def __str__(self):
        """Return a nice string representation of the Category."""
    def __str__(self) -> str:
        """
        Return a nice string representation of the Category.
        """
        return f'<Category "{self.title}" of {str(self.site)}>'

    def __iter__(self):
        """Iterate over all members of the category."""
    def __iter__(self) -> Iterator[Page]:
        """
        Iterate over all members of the category.
        """
        return self.get_members()

    def _get_members_via_api(self, limit, follow):
        """Iterate over Pages in the category using the API."""
        params = {
    def _get_members_via_api(self, limit: int | None, follow: bool) -> Iterator[Page]:
        """
        Iterate over Pages in the category using the API.
        """
        params: dict[str, str | int] = {
            "action": "query",
            "list": "categorymembers",
            "cmtitle": self.title,
@@ -84,8 +95,10 @@ class Category(Page):
            else:
                break

    def _get_members_via_sql(self, limit, follow):
        """Iterate over Pages in the category using SQL."""
    def _get_members_via_sql(self, limit: int | None, follow: bool) -> Iterator[Page]:
        """
        Iterate over Pages in the category using SQL.
        """
        query = """SELECT page_title, page_namespace, page_id FROM page
                   JOIN categorylinks ON page_id = cl_from
                   WHERE cl_to = ?"""
@@ -107,16 +120,20 @@ class Category(Page):
                title = base
            yield self.site.get_page(title, follow_redirects=follow, pageid=row[2])

    def _get_size_via_api(self, member_type):
        """Return the size of the category using the API."""
    def _get_size_via_api(self, member_type: str) -> int:
        """
        Return the size of the category using the API.
        """
        result = self.site.api_query(
            action="query", prop="categoryinfo", titles=self.title
        )
        info = list(result["query"]["pages"].values())[0]["categoryinfo"]
        return info[member_type]

    def _get_size_via_sql(self, member_type):
        """Return the size of the category using SQL."""
    def _get_size_via_sql(self, member_type: str) -> int:
        """
        Return the size of the category using SQL.
        """
        query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
        title = self.title.replace(" ", "_").split(":", 1)[1]
        if member_type == "size":
@@ -126,49 +143,54 @@ class Category(Page):
            result = self.site.sql_query(query, (title, member_type[:-1]))
        return list(result)[0][0]

    def _get_size(self, member_type):
        """Return the size of the category."""
    def _get_size(self, member_type: str) -> int:
        """
        Return the size of the category.
        """
        services = {
            self.site.SERVICE_API: self._get_size_via_api,
            self.site.SERVICE_SQL: self._get_size_via_sql,
            Service.API: self._get_size_via_api,
            Service.SQL: self._get_size_via_sql,
        }
        return self.site.delegate(services, (member_type,))
        return self.site.delegate(services, member_type)

    @property
    def size(self):
        """The total number of members in the category.
    def size(self) -> int:
        """
        The total number of members in the category.

        Includes pages, files, and subcats. Equal to :py:attr:`pages` +
        :py:attr:`files` + :py:attr:`subcats`. This will use either the API or
        SQL depending on which are enabled and the amount of lag on each. This
        is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.
        :py:attr:`files` + :py:attr:`subcats`. This will use either the API or SQL
        depending on which are enabled and the amount of lag on each. This is handled
        by :py:meth:`site.delegate() <earwigbot.wiki.site.Site.delegate>`.
        """
        return self._get_size("size")

    @property
    def pages(self):
        """The number of pages in the category.
    def pages(self) -> int:
        """
        The number of pages in the category.

        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
        This will use either the API or SQL depending on which are enabled and the
        amount of lag on each. This is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.
        """
        return self._get_size("pages")

    @property
    def files(self):
        """The number of files in the category.
    def files(self) -> int:
        """
        The number of files in the category.

        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
        This will use either the API or SQL depending on which are enabled and the
        amount of lag on each. This is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.
        """
        return self._get_size("files")

    @property
    def subcats(self):
        """The number of subcategories in the category.
    def subcats(self) -> int:
        """
        The number of subcategories in the category.

        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
@@ -176,36 +198,38 @@ class Category(Page):
        """
        return self._get_size("subcats")

    def get_members(self, limit=None, follow_redirects=None):
        """Iterate over Pages in the category.
    def get_members(
        self, limit: int | None = None, follow_redirects: bool | None = None
    ) -> Iterator[Page]:
        """
        Iterate over Pages in the category.

        If *limit* is given, we will provide this many pages, or less if the
        category is smaller. By default, *limit* is ``None``, meaning we will
        keep iterating over members until the category is exhausted.
        *follow_redirects* is passed directly to :py:meth:`site.get_page()
        <earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which
        will use the value passed to our :py:meth:`__init__`.
        If *limit* is given, we will provide this many pages, or less if the category
        is smaller. By default, *limit* is ``None``, meaning we will keep iterating
        over members until the category is exhausted. *follow_redirects* is passed
        directly to :py:meth:`site.get_page() <earwigbot.wiki.site.Site.get_page>`;
        it defaults to ``None``, which will use the value passed to our
        :py:meth:`__init__`.

        This will use either the API or SQL depending on which are enabled and
        the amount of lag on each. This is handled by :py:meth:`site.delegate()
        This will use either the API or SQL depending on which are enabled and the
        amount of lag on each. This is handled by :py:meth:`site.delegate()
        <earwigbot.wiki.site.Site.delegate>`.

        .. note::
           Be careful when iterating over very large categories with no limit.
           If using the API, at best, you will make one query per 5000 pages,
           which can add up significantly for categories with hundreds of
           thousands of members. As for SQL, note that *all page titles are
           stored internally* as soon as the query is made, so the site-wide
           SQL lock can be freed and unrelated queries can be made without
           requiring a separate connection to be opened. This is generally not
           an issue unless your category's size approaches several hundred
           Be careful when iterating over very large categories with no limit. If using
           the API, at best, you will make one query per 5000 pages, which can add up
           significantly for categories with hundreds of thousands of members. As for
           SQL, note that *all page titles are stored internally* as soon as the query
           is made, so the site-wide SQL lock can be freed and unrelated queries can be
           made without requiring a separate connection to be opened. This is generally
           not an issue unless your category's size approaches several hundred
           thousand, in which case the sheer number of titles in memory becomes
           problematic.
        """
        services = {
            self.site.SERVICE_API: self._get_members_via_api,
            self.site.SERVICE_SQL: self._get_members_via_sql,
            Service.API: self._get_members_via_api,
            Service.SQL: self._get_members_via_sql,
        }
        if follow_redirects is None:
            follow_redirects = self._follow_redirects
        return self.site.delegate(services, (limit, follow_redirects))
        return self.site.delegate(services, limit, follow_redirects)
--- a/earwigbot/wiki/constants.py
+++ b/earwigbot/wiki/constants.py
@@ -31,14 +31,50 @@ Import directly with ``from earwigbot.wiki import constants`` or
 :py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``).
 """

 __all__ = [
    "NS_CATEGORY_TALK",
    "NS_CATEGORY",
    "NS_DRAFT_TALK",
    "NS_DRAFT",
    "NS_FILE_TALK",
    "NS_FILE",
    "NS_HELP_TALK",
    "NS_HELP",
    "NS_MAIN",
    "NS_MEDIA",
    "NS_MEDIAWIKI_TALK",
    "NS_MEDIAWIKI",
    "NS_MODULE_TALK",
    "NS_MODULE",
    "NS_PORTAL_TALK",
    "NS_PORTAL",
    "NS_PROJECT_TALK",
    "NS_PROJECT",
    "NS_SPECIAL",
    "NS_TALK",
    "NS_TEMPLATE_TALK",
    "NS_TEMPLATE",
    "NS_USER_TALK",
    "NS_USER",
    "USER_AGENT",
 ]

 import platform
 from enum import Enum

 import earwigbot

 # Default User Agent when making API queries:
 from platform import python_version as _p
 USER_AGENT = (
    f"EarwigBot/{earwigbot.__version__} "
    f"(Python/{platform.python_version()}; https://github.com/earwig/earwigbot)"
 )

 from earwigbot import __version__ as _v

 USER_AGENT = "EarwigBot/{0} (Python/{1}; https://github.com/earwig/earwigbot)"
 USER_AGENT = USER_AGENT.format(_v, _p())
 del _v, _p
 class Service(Enum):
    API = 1
    SQL = 2


 # Default namespace IDs:
 NS_MAIN = 0
@@ -57,5 +93,13 @@ NS_HELP = 12
 NS_HELP_TALK = 13
 NS_CATEGORY = 14
 NS_CATEGORY_TALK = 15

 NS_PORTAL = 100
 NS_PORTAL_TALK = 101
 NS_DRAFT = 118
 NS_DRAFT_TALK = 119
 NS_MODULE = 828
 NS_MODULE_TALK = 829

 NS_SPECIAL = -1
 NS_MEDIA = -2
--- a/earwigbot/wiki/page.py
+++ b/earwigbot/wiki/page.py
@@ -1,4 +1,4 @@
 # Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
 # Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -18,17 +18,27 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import annotations

 import hashlib
 import re
 from hashlib import md5
 from logging import NullHandler, getLogger
 from time import gmtime, strftime
 from urllib.parse import quote
 import time
 import typing
 import urllib.parse
 from collections.abc import Iterable
 from logging import Logger, NullHandler, getLogger
 from typing import Any

 import mwparserfromhell

 from earwigbot import exceptions
 from earwigbot.exceptions import APIError
 from earwigbot.wiki.copyvios import CopyvioMixIn

 if typing.TYPE_CHECKING:
    from earwigbot.wiki.site import Site
    from earwigbot.wiki.user import User

 __all__ = ["Page"]


@@ -36,10 +46,10 @@ class Page(CopyvioMixIn):
    """
    **EarwigBot: Wiki Toolset: Page**

    Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has
    methods for getting information about the page, getting page content, and
    so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of
    :py:class:`Page` with additional methods.
    Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for
    getting information about the page, getting page content, and so on.
    :py:class:`~earwigbot.wiki.category.Category` is a subclass of :py:class:`Page`
    with additional methods.

    *Attributes:*

@@ -59,20 +69,19 @@ class Page(CopyvioMixIn):
    - :py:meth:`reload`:      forcibly reloads the page's attributes
    - :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa
    - :py:meth:`get`:         returns the page's content
    - :py:meth:`get_redirect_target`: returns the page's destination if it is a
      redirect
    - :py:meth:`get_creator`: returns a User object representing the first
      person to edit the page
    - :py:meth:`get_redirect_target`: returns the page's destination if it is a redirect
    - :py:meth:`get_creator`: returns a User object representing the first person to
      edit the page
    - :py:meth:`parse`:       parses the page content for templates, links, etc
    - :py:meth:`edit`:        replaces the page's content or creates a new page
    - :py:meth:`add_section`: adds a new section at the bottom of the page
    - :py:meth:`check_exclusion`: checks whether or not we are allowed to edit
      the page, per ``{{bots}}``/``{{nobots}}``
    - :py:meth:`check_exclusion`: checks whether or not we are allowed to edit the
      page, per ``{{bots}}``/``{{nobots}}``

    - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`:
      checks the page for copyright violations
    - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`:
      checks the page like :py:meth:`copyvio_check`, but against a specific URL
    - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: checks the page
      for copyright violations
    - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: checks the
      page like :py:meth:`copyvio_check`, but against a specific URL
    """

    PAGE_UNKNOWN = 0
@@ -80,18 +89,26 @@ class Page(CopyvioMixIn):
    PAGE_MISSING = 2
    PAGE_EXISTS = 3

    def __init__(self, site, title, follow_redirects=False, pageid=None, logger=None):
        """Constructor for new Page instances.
    def __init__(
        self,
        site: Site,
        title: str,
        follow_redirects: bool = False,
        pageid: int | None = None,
        logger: Logger | None = None,
    ) -> None:
        """
        Constructor for new Page instances.

        Takes four arguments: a Site object, the Page's title (or pagename),
        whether or not to follow redirects (optional, defaults to False), and
        a page ID to supplement the title (optional, defaults to None - i.e.,
        we will have to query the API to get it).
        Takes four arguments: a Site object, the Page's title (or pagename), whether or
        not to follow redirects (optional, defaults to False), and a page ID to
        supplement the title (optional, defaults to None - i.e., we will have to query
        the API to get it).

        As with User, site.get_page() is preferred.

        __init__() will not do any API queries, but it will use basic namespace
        logic to determine our namespace ID and if we are a talkpage.
        __init__() will not do any API queries, but it will use basic namespace logic
        to determine our namespace ID and if we are a talkpage.
        """
        super().__init__(site)
        self._site = site
@@ -108,16 +125,16 @@ class Page(CopyvioMixIn):

        # Attributes to be loaded through the API:
        self._exists = self.PAGE_UNKNOWN
        self._is_redirect = None
        self._lastrevid = None
        self._protection = None
        self._fullurl = None
        self._content = None
        self._creator = None
        self._is_redirect: bool | None = None
        self._lastrevid: int | None = None
        self._protection: dict | None = None
        self._fullurl: str | None = None
        self._content: str | None = None
        self._creator: str | None = None

        # Attributes used for editing/deleting/protecting/etc:
        self._basetimestamp = None
        self._starttimestamp = None
        self._basetimestamp: str | None = None
        self._starttimestamp: str | None = None

        # Try to determine the page's namespace using our site's namespace
        # converter:
@@ -137,54 +154,60 @@ class Page(CopyvioMixIn):
        else:
            self._is_talkpage = self._namespace % 2 == 1

    def __repr__(self):
        """Return the canonical string representation of the Page."""
    def __repr__(self) -> str:
        """
        Return the canonical string representation of the Page.
        """
        res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})"
        return res.format(self._title, self._follow_redirects, self._site)

    def __str__(self):
        """Return a nice string representation of the Page."""
    def __str__(self) -> str:
        """
        Return a nice string representation of the Page.
        """
        return f'<Page "{self.title}" of {str(self.site)}>'

    def _assert_validity(self):
        """Used to ensure that our page's title is valid.
    def _assert_validity(self) -> None:
        """
        Used to ensure that our page's title is valid.

        If this method is called when our page is not valid (and after
        _load_attributes() has been called), InvalidPageError will be raised.

        Note that validity != existence. If a page's title is invalid (e.g, it
        contains "[") it will always be invalid, and cannot be edited.
        Note that validity != existence. If a page's title is invalid (e.g, it contains
        "[") it will always be invalid, and cannot be edited.
        """
        if self._exists == self.PAGE_INVALID:
            e = f"Page '{self._title}' is invalid."
            raise exceptions.InvalidPageError(e)

    def _assert_existence(self):
        """Used to ensure that our page exists.
    def _assert_existence(self) -> None:
        """
        Used to ensure that our page exists.

        If this method is called when our page doesn't exist (and after
        _load_attributes() has been called), PageNotFoundError will be raised.
        It will also call _assert_validity() beforehand.
        _load_attributes() has been called), PageNotFoundError will be raised. It will
        also call _assert_validity() beforehand.
        """
        self._assert_validity()
        if self._exists == self.PAGE_MISSING:
            e = f"Page '{self._title}' does not exist."
            raise exceptions.PageNotFoundError(e)

    def _load(self):
        """Call _load_attributes() and follows redirects if we're supposed to.
    def _load(self) -> None:
        """
        Call _load_attributes() and follow redirects if we're supposed to.

        This method will only follow redirects if follow_redirects=True was
        passed to __init__() (perhaps indirectly passed by site.get_page()).
        It avoids the API's &redirects param in favor of manual following,
        so we can act more realistically (we don't follow double redirects, and
        circular redirects don't break us).
        This method will only follow redirects if follow_redirects=True was passed to
        __init__() (perhaps indirectly passed by site.get_page()). It avoids the API's
        &redirects param in favor of manual following, so we can act more realistically
        (we don't follow double redirects, and circular redirects don't break us).

        This will raise RedirectError if we have a problem following, but that
        is a bug and should NOT happen.
        This will raise RedirectError if we have a problem following, but that is a bug
        and should NOT happen.

        If we're following a redirect, this will make a grand total of three
        API queries. It's a lot, but each one is quite small.
        If we're following a redirect, this will make a grand total of three API
        queries. It's a lot, but each one is quite small.
        """
        self._load_attributes()

@@ -194,14 +217,14 @@ class Page(CopyvioMixIn):
            self._content = None  # reset the content we just loaded
            self._load_attributes()

    def _load_attributes(self, result=None):
        """Load various data from the API in a single query.
    def _load_attributes(self, result: dict | None = None) -> None:
        """
        Load various data from the API in a single query.

        Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl,
        ._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, and
        ._starttimestamp using the API. It will do a query of its own unless
        *result* is provided, in which case we'll pretend *result* is what the
        query returned.
        Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, ._protection,
        ._namespace, ._is_talkpage, ._creator, ._lastrevid, and ._starttimestamp using
        the API. It will do a query of its own unless *result* is provided, in which
        case we'll pretend *result* is what the query returned.

        Assuming the API is sound, this should not raise any exceptions.
        """
@@ -217,6 +240,7 @@ class Page(CopyvioMixIn):
                titles=self._title,
            )

        assert result is not None
        if "interwiki" in result["query"]:
            self._title = result["query"]["interwiki"][0]["title"]
            self._exists = self.PAGE_INVALID
@@ -242,7 +266,7 @@ class Page(CopyvioMixIn):

        self._fullurl = res["fullurl"]
        self._protection = res["protection"]
        self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
        self._starttimestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

        # We've determined the namespace and talkpage status in __init__()
        # based on the title, but now we can be sure:
@@ -256,15 +280,15 @@ class Page(CopyvioMixIn):
        except KeyError:
            pass

    def _load_content(self, result=None):
        """Load current page content from the API.
    def _load_content(self, result: dict | None = None) -> None:
        """
        Load current page content from the API.

        If *result* is provided, we'll pretend that is the result of an API
        query and try to get content from that. Otherwise, we'll do an API
        query on our own.
        If *result* is provided, we'll pretend that is the result of an API query and
        try to get content from that. Otherwise, we'll do an API query on our own.

        Don't call this directly, ever; use reload() followed by get() if you
        want to force content reloading.
        Don't call this directly, ever; use reload() followed by get() if you want to
        force content reloading.
        """
        if not result:
            query = self.site.api_query
@@ -277,6 +301,7 @@ class Page(CopyvioMixIn):
                titles=self._title,
            )

        assert result is not None
        res = list(result["query"]["pages"].values())[0]
        try:
            revision = res["revisions"][0]
@@ -291,32 +316,32 @@ class Page(CopyvioMixIn):

    def _edit(
        self,
        params=None,
        text=None,
        summary=None,
        minor=None,
        bot=None,
        force=None,
        section=None,
        captcha_id=None,
        captcha_word=None,
        **kwargs,
    ):
        """Edit the page!

        If *params* is given, we'll use it as our API query parameters.
        Otherwise, we'll build params using the given kwargs via
        _build_edit_params().

        We'll then try to do the API query, and catch any errors the API raises
        in _handle_edit_errors(). We'll then throw these back as subclasses of
        EditError.
        params: dict[str, Any] | None = None,
        text: str | None = None,
        summary: str | None = None,
        minor: bool | None = None,
        bot: bool | None = None,
        force: bool | None = None,
        section: int | str | None = None,
        captcha_id: str | None = None,
        captcha_word: str | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Edit the page!

        If *params* is given, we'll use it as our API query parameters. Otherwise,
        we'll build params using the given kwargs via _build_edit_params().

        We'll then try to do the API query, and catch any errors the API raises in
        _handle_edit_errors(). We'll then throw these back as subclasses of EditError.
        """
        # Weed out invalid pages before we get too far:
        self._assert_validity()

        # Build our API query string:
        if not params:
            assert text is not None, "Edit text must be provided when params are unset"
            params = self._build_edit_params(
                text,
                summary,
@@ -351,26 +376,26 @@ class Page(CopyvioMixIn):

    def _build_edit_params(
        self,
        text,
        summary,
        minor,
        bot,
        force,
        section,
        captcha_id,
        captcha_word,
        kwargs,
    ):
        """Given some keyword arguments, build an API edit query string."""
        unitxt = text.encode("utf8") if isinstance(text, str) else text
        hashed = md5(unitxt).hexdigest()  # Checksum to ensure text is correct
        text: str,
        summary: str | None,
        minor: bool | None,
        bot: bool | None,
        force: bool | None,
        section: int | str | None,
        captcha_id: str | None,
        captcha_word: str | None,
        kwargs: dict[str, Any],
    ) -> dict[str, Any]:
        """
        Given some keyword arguments, build an API edit query string.
        """
        params = {
            "action": "edit",
            "title": self._title,
            "text": text,
            "token": self.site.get_token(),
            "summary": summary,
            "md5": hashed,
            "md5": hashlib.md5(text.encode("utf-8")).hexdigest(),
        }

        if section:
@@ -403,12 +428,15 @@ class Page(CopyvioMixIn):
                params[key] = val
        return params

    def _handle_edit_errors(self, error, params, retry=True):
        """If our edit fails due to some error, try to handle it.
    def _handle_edit_errors(
        self, error: APIError, params: dict[str, Any], retry: bool = True
    ) -> dict:
        """
        If our edit fails due to some error, try to handle it.

        We'll either raise an appropriate exception (for example, if the page
        is protected), or we'll try to fix it (for example, if the token is
        invalid, we'll try to get a new one).
        We'll either raise an appropriate exception (for example, if the page is
        protected), or we'll try to fix it (for example, if the token is invalid, we'll
        try to get a new one).
        """
        perms = [
            "noedit",
@@ -447,27 +475,31 @@ class Page(CopyvioMixIn):
        raise exceptions.EditError(": ".join((error.code, error.info)))

    @property
    def site(self):
        """The page's corresponding Site object."""
    def site(self) -> Site:
        """
        The page's corresponding Site object.
        """
        return self._site

    @property
    def title(self):
        """The page's title, or "pagename".
    def title(self) -> str:
        """
        The page's title, or "pagename".

        This won't do any API queries on its own. Any other attributes or
        methods that do API queries will reload the title, however, like
        :py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or
        following redirects if :py:attr:`self._follow_redirects` is ``True``.
        This won't do any API queries on its own. Any other attributes or methods that
        do API queries will reload the title, however, like :py:attr:`exists` and
        :py:meth:`get`, potentially "normalizing" it or following redirects if
        :py:attr:`self._follow_redirects` is ``True``.
        """
        return self._title

    @property
    def exists(self):
        """Whether or not the page exists.
    def exists(self) -> int:
        """
        Whether or not the page exists.

        This will be a number; its value does not matter, but it will equal
        one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`,
        This will be a number; its value does not matter, but it will equal one of
        :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`,
        :py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or
        :py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`.

@@ -478,55 +510,60 @@ class Page(CopyvioMixIn):
        return self._exists

    @property
    def pageid(self):
        """An integer ID representing the page.
    def pageid(self) -> int:
        """
        An integer ID representing the page.

        Makes an API query only if we haven't already made one and the *pageid*
        parameter to :py:meth:`__init__` was left as ``None``, which should be
        true for all cases except when pages are returned by an SQL generator
        (like :py:meth:`category.get_members()
        parameter to :py:meth:`__init__` was left as ``None``, which should be true for
        all cases except when pages are returned by an SQL generator (like
        :py:meth:`category.get_members()
        <earwigbot.wiki.category.Category.get_members>`).

        Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
        invalid or the page does not exist, respectively.
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
        or the page does not exist, respectively.
        """
        if self._pageid:
            return self._pageid
        if self._exists == self.PAGE_UNKNOWN:
            self._load()
        self._assert_existence()  # Missing pages do not have IDs
        assert self._pageid is not None, "Page exists but does not have an ID"
        return self._pageid

    @property
    def url(self):
        """The page's URL.
    def url(self) -> str:
        """
        The page's URL.

        Like :py:meth:`title`, this won't do any API queries on its own. If the
        API was never queried for this page, we will attempt to determine the
        URL ourselves based on the title.
        Like :py:meth:`title`, this won't do any API queries on its own. If the API was
        never queried for this page, we will attempt to determine the URL ourselves
        based on the title.
        """
        if self._fullurl:
            return self._fullurl
        else:
            encoded = self._title.encode("utf8").replace(" ", "_")
            slug = quote(encoded, safe="/:").decode("utf8")
            path = self.site._article_path.replace("$1", slug)
            encoded = self._title.replace(" ", "_")
            slug = urllib.parse.quote(encoded, safe="/:")
            path = self.site.article_path.replace("$1", slug)
            return "".join((self.site.url, path))

    @property
    def namespace(self):
        """The page's namespace ID (an integer).
    def namespace(self) -> int:
        """
        The page's namespace ID (an integer).

        Like :py:meth:`title`, this won't do any API queries on its own. If the
        API was never queried for this page, we will attempt to determine the
        namespace ourselves based on the title.
        Like :py:meth:`title`, this won't do any API queries on its own. If the API was
        never queried for this page, we will attempt to determine the namespace
        ourselves based on the title.
        """
        return self._namespace

    @property
    def lastrevid(self):
        """The ID of the page's most recent revision.
    def lastrevid(self) -> int | None:
        """
        The ID of the page's most recent revision.

        Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
@@ -538,14 +575,15 @@ class Page(CopyvioMixIn):
        return self._lastrevid

    @property
    def protection(self):
        """The page's current protection status.
    def protection(self) -> dict | None:
        """
        The page's current protection status.

        Makes an API query only if we haven't already made one.

        Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page
        name is invalid. Won't raise an error if the page is missing because
        those can still be create-protected.
        Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page name is
        invalid. Won't raise an error if the page is missing because those can still be
        create-protected.
        """
        if self._exists == self.PAGE_UNKNOWN:
            self._load()
@@ -553,17 +591,18 @@ class Page(CopyvioMixIn):
        return self._protection

    @property
    def is_talkpage(self):
        """``True`` if the page is a talkpage, otherwise ``False``.
    def is_talkpage(self) -> bool:
        """
        ``True`` if the page is a talkpage, otherwise ``False``.

        Like :py:meth:`title`, this won't do any API queries on its own. If the
        API was never queried for this page, we will attempt to determine
        whether it is a talkpage ourselves based on its namespace.
        Like :py:meth:`title`, this won't do any API queries on its own. If the API was
        never queried for this page, we will attempt to determine whether it is a
        talkpage ourselves based on its namespace.
        """
        return self._is_talkpage

    @property
    def is_redirect(self):
    def is_redirect(self) -> bool:
        """``True`` if the page is a redirect, otherwise ``False``.

        Makes an API query only if we haven't already made one.
@@ -572,34 +611,36 @@ class Page(CopyvioMixIn):
        """
        if self._exists == self.PAGE_UNKNOWN:
            self._load()
        assert self._is_redirect is not None
        return self._is_redirect

    def reload(self):
        """Forcibly reload the page's attributes.
    def reload(self) -> None:
        """
        Forcibly reload the page's attributes.

        Emphasis on *reload*: this is only necessary if there is reason to
        believe they have changed.
        Emphasis on *reload*: this is only necessary if there is reason to believe they
        have changed.
        """
        self._load()
        if self._content is not None:
            # Only reload content if it has already been loaded:
            self._load_content()

    def toggle_talk(self, follow_redirects=None):
        """Return a content page's talk page, or vice versa.
    def toggle_talk(self, follow_redirects: bool | None = None) -> Page:
        """
        Return a content page's talk page, or vice versa.

        The title of the new page is determined by namespace logic, not API
        queries. We won't make any API queries on our own.
        The title of the new page is determined by namespace logic, not API queries.
        We won't make any API queries on our own.

        If *follow_redirects* is anything other than ``None`` (the default), it
        will be passed to the new :py:class:`~earwigbot.wiki.page.Page`
        object's :py:meth:`__init__`. Otherwise, we'll use the value passed to
        our own :py:meth:`__init__`.
        If *follow_redirects* is anything other than ``None`` (the default), it will be
        passed to the new :py:class:`~earwigbot.wiki.page.Page` object's
        :py:meth:`__init__`. Otherwise, we'll use the value passed to our own
        :py:meth:`__init__`.

        Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try
        to get the talk page of a special page (in the ``Special:`` or
        ``Media:`` namespaces), but we won't raise an exception if our page is
        otherwise missing or invalid.
        Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try to get
        the talk page of a special page (in the ``Special:`` or ``Media:`` namespaces),
        but we won't raise an exception if our page is otherwise missing or invalid.
        """
        if self._namespace < 0:
            ns = self.site.namespace_id_to_name(self._namespace)
@@ -629,11 +670,12 @@ class Page(CopyvioMixIn):
            follow_redirects = self._follow_redirects
        return Page(self.site, new_title, follow_redirects)

    def get(self):
        """Return page content, which is cached if you try to call get again.
    def get(self) -> str:
        """
        Return page content, which is cached if you try to call get again.

        Raises InvalidPageError or PageNotFoundError if the page name is
        invalid or the page does not exist, respectively.
        Raises InvalidPageError or PageNotFoundError if the page name is invalid or the
        page does not exist, respectively.
        """
        if self._exists == self.PAGE_UNKNOWN:
            # Kill two birds with one stone by doing an API query for both our
@@ -659,6 +701,7 @@ class Page(CopyvioMixIn):
                self._exists = self.PAGE_UNKNOWN  # Force another API query
                self.get()

            assert self._content is not None
            return self._content

        # Make sure we're dealing with a real page here. This may be outdated
@@ -669,16 +712,17 @@ class Page(CopyvioMixIn):
        if self._content is None:
            self._load_content()

        assert self._content is not None
        return self._content

    def get_redirect_target(self):
        """If the page is a redirect, return its destination.
    def get_redirect_target(self) -> str:
        """
        If the page is a redirect, return its destination.

        Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
        invalid or the page does not exist, respectively. Raises
        :py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a
        redirect.
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
        or the page does not exist, respectively. Raises
        :py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a redirect.
        """
        re_redirect = r"^\s*\#\s*redirect\s*\[\[(.*?)\]\]"
        content = self.get()
@@ -688,19 +732,20 @@ class Page(CopyvioMixIn):
            e = "The page does not appear to have a redirect target."
            raise exceptions.RedirectError(e)

    def get_creator(self):
        """Return the User object for the first person to edit the page.
    def get_creator(self) -> User:
        """
        Return the User object for the first person to edit the page.

        Makes an API query only if we haven't already made one. Normally, we
        can get the creator along with everything else (except content) in
        :py:meth:`_load_attributes`. However, due to a limitation in the API
        (can't get the editor of one revision and the content of another at
        both ends of the history), if our other attributes were only loaded
        through :py:meth:`get`, we'll have to do another API query.
        Makes an API query only if we haven't already made one. Normally, we can get
        the creator along with everything else (except content) in
        :py:meth:`_load_attributes`. However, due to a limitation in the API (can't get
        the editor of one revision and the content of another at both ends of the
        history), if our other attributes were only loaded through :py:meth:`get`,
        we'll have to do another API query.

        Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
        invalid or the page does not exist, respectively.
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
        or the page does not exist, respectively.
        """
        if self._exists == self.PAGE_UNKNOWN:
            self._load()
@@ -710,41 +755,59 @@ class Page(CopyvioMixIn):
            self._assert_existence()
        return self.site.get_user(self._creator)

    def parse(self):
        """Parse the page content for templates, links, etc.
    def parse(self) -> mwparserfromhell.wikicode.Wikicode:
        """
        Parse the page content for templates, links, etc.

        Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises
        :py:exc:`~earwigbot.exceptions.InvalidPageError` or
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
        invalid or the page does not exist, respectively.
        :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
        or the page does not exist, respectively.
        """
        return mwparserfromhell.parse(self.get())

    def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs):
        """Replace the page's content or creates a new page.
    def edit(
        self,
        text: str,
        summary: str | None,
        minor: bool = False,
        bot: bool = True,
        force: bool = False,
        **kwargs: Any,
    ) -> None:
        """
        Replace the page's content or creates a new page.

        *text* is the new page content, with *summary* as the edit summary.
        If *minor* is ``True``, the edit will be marked as minor. If *bot* is
        ``True``, the edit will be marked as a bot edit, but only if we
        actually have a bot flag.
        *text* is the new page content, with *summary* as the edit summary. If *minor*
        is ``True``, the edit will be marked as minor. If *bot* is ``True``, the edit
        will be marked as a bot edit, but only if we actually have a bot flag.

        Use *force* to push the new content even if there's an edit conflict or
        the page was deleted/recreated between getting our edit token and
        editing our page. Be careful with this!
        Use *force* to push the new content even if there's an edit conflict or the
        page was deleted/recreated between getting our edit token and editing our page.
        Be careful with this!
        """
        self._edit(
            text=text, summary=summary, minor=minor, bot=bot, force=force, **kwargs
        )

    def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs):
        """Add a new section to the bottom of the page.
    def add_section(
        self,
        text: str,
        title: str,
        minor: bool = False,
        bot: bool = True,
        force: bool = False,
        **kwargs: Any,
    ) -> None:
        """
        Add a new section to the bottom of the page.

        The arguments for this are the same as those for :py:meth:`edit`, but
        instead of providing a summary, you provide a section title. Likewise,
        raised exceptions are the same as :py:meth:`edit`'s.
        The arguments for this are the same as those for :py:meth:`edit`, but instead
        of providing a summary, you provide a section title. Likewise, raised
        exceptions are the same as :py:meth:`edit`'s.

        This should create the page if it does not already exist, with just the
        new section as content.
        This should create the page if it does not already exist, with just the new
        section as content.
        """
        self._edit(
            text=text,
@@ -756,25 +819,27 @@ class Page(CopyvioMixIn):
            **kwargs,
        )

    def check_exclusion(self, username=None, optouts=None):
        """Check whether or not we are allowed to edit the page.
    def check_exclusion(
        self, username: str | None = None, optouts: Iterable[str] | None = None
    ) -> bool:
        """
        Check whether or not we are allowed to edit the page.

        Return ``True`` if we *are* allowed to edit this page, and ``False`` if
        we aren't.

        *username* is used to determine whether we are part of a specific list
        of allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or
        ``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which
        will swipe our username from :py:meth:`site.get_user()
        *username* is used to determine whether we are part of a specific list of
        allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or
        ``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which will swipe
        our username from :py:meth:`site.get_user()
        <earwigbot.wiki.site.Site.get_user>`.\
        :py:attr:`~earwigbot.wiki.user.User.name`.

        *optouts* is a list of messages to consider this check as part of for
        the purpose of opt-out; it defaults to ``None``, which ignores the
        parameter completely. For example, if *optouts* is ``["nolicense"]``,
        we'll return ``False`` on ``{{bots|optout=nolicense}}`` or
        ``{{bots|optout=all}}``, but `True` on
        ``{{bots|optout=orfud,norationale,replaceable}}``.
        *optouts* is a list of messages to consider this check as part of for the
        purpose of opt-out; it defaults to ``None``, which ignores the parameter
        completely. For example, if *optouts* is ``["nolicense"]``, we'll return
        ``False`` on ``{{bots|optout=nolicense}}`` or ``{{bots|optout=all}}``, but
        `True` on ``{{bots|optout=orfud,norationale,replaceable}}``.
        """

        def parse_param(template, param):
--- a/earwigbot/wiki/site.py
+++ b/earwigbot/wiki/site.py
--- a/earwigbot/wiki/sitesdb.py
+++ b/earwigbot/wiki/sitesdb.py
@@ -18,78 +18,102 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import annotations

 import errno
 import sqlite3 as sqlite
 import stat
 import typing
 from collections import OrderedDict
 from http.cookiejar import LoadError, LWPCookieJar
 from dataclasses import dataclass
 from http.cookiejar import CookieJar, LoadError, LWPCookieJar
 from os import chmod, path
 from platform import python_version

 from earwigbot import __version__
 from earwigbot.exceptions import SiteNotFoundError
 from earwigbot.wiki.copyvios.exclusions import ExclusionsDB
 from earwigbot.wiki.site import Site
 from earwigbot.wiki.site import Site, SqlConnInfo

 if typing.TYPE_CHECKING:
    from earwigbot.bot import Bot

 __all__ = ["SitesDB"]


@dataclass(frozen=True)
 class _SiteInfoFromDB:
    name: str
    project: str
    lang: str
    base_url: str
    article_path: str
    script_path: str
    sql: SqlConnInfo
    namespaces: dict[int, list[str]]


 class SitesDB:
    """
    **EarwigBot: Wiki Toolset: Sites Database Manager**

    This class controls the :file:`sites.db` file, which stores information
    about all wiki sites known to the bot. Three public methods act as bridges
    between the bot's config files and :py:class:`~earwigbot.wiki.site.Site`
    objects:
    This class controls the :file:`sites.db` file, which stores information about all
    wiki sites known to the bot. Three public methods act as bridges between the bot's
    config files and :py:class:`~earwigbot.wiki.site.Site` objects:

    - :py:meth:`get_site`:    returns a Site object corresponding to a site
    - :py:meth:`add_site`:    stores a site in the database
    - :py:meth:`remove_site`: removes a site from the database

    There's usually no need to use this class directly. All public methods
    here are available as :py:meth:`bot.wiki.get_site`,
    :py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which
    use a :file:`sites.db` file located in the same directory as our
    :file:`config.yml` file. Lower-level access can be achieved by importing
    the manager class (``from earwigbot.wiki import SitesDB``).
    There's usually no need to use this class directly. All public methods here are
    available as :py:meth:`bot.wiki.get_site`, :py:meth:`bot.wiki.add_site`, and
    :py:meth:`bot.wiki.remove_site`, which use a :file:`sites.db` file located in the
    same directory as our :file:`config.yml` file. Lower-level access can be achieved
    by importing the manager class (``from earwigbot.wiki import SitesDB``).
    """

    def __init__(self, bot):
        """Set up the manager with an attribute for the base Bot object."""
    def __init__(self, bot: Bot) -> None:
        """
        Set up the manager with an attribute for the base Bot object.
        """
        self.config = bot.config
        self._logger = bot.logger.getChild("wiki")

        self._sites = {}  # Internal site cache
        self._sites: dict[str, Site] = {}  # Internal site cache
        self._sitesdb = path.join(bot.config.root_dir, "sites.db")
        self._cookie_file = path.join(bot.config.root_dir, ".cookies")
        self._cookiejar = None
        self._cookiejar: CookieJar | None = None

        excl_db = path.join(bot.config.root_dir, "exclusions.db")
        excl_logger = self._logger.getChild("exclusionsdb")
        self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger)

    def __repr__(self):
        """Return the canonical string representation of the SitesDB."""
    def __repr__(self) -> str:
        """
        Return the canonical string representation of the SitesDB.
        """
        res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})"
        return res.format(self.config, self._sitesdb, self._cookie_file)

    def __str__(self):
        """Return a nice string representation of the SitesDB."""
    def __str__(self) -> str:
        """
        Return a nice string representation of the SitesDB.
        """
        return f"<SitesDB at {self._sitesdb}>"

    def _get_cookiejar(self):
        """Return a LWPCookieJar object loaded from our .cookies file.
    def _get_cookiejar(self) -> CookieJar:
        """
        Return a LWPCookieJar object loaded from our .cookies file.

        The same .cookies file is returned every time, located in the project
        root, same directory as config.yml and bot.py. If it doesn't exist, we
        will create the file and set it to be readable and writeable only by
        us. If it exists but the information inside is bogus, we'll ignore it.
        The same .cookies file is returned every time, located in the project root,
        same directory as config.yml and bot.py. If it doesn't exist, we will create
        the file and set it to be readable and writeable only by us. If it exists but
        the information inside is bogus, we'll ignore it.

        This is normally called by _make_site_object() (in turn called by
        get_site()), and the cookiejar is passed to our Site's constructor,
        used when it makes API queries. This way, we can easily preserve
        cookies between sites (e.g., for CentralAuth), making logins easier.
        This is normally called by _make_site_object() (in turn called by get_site()),
        and the cookiejar is passed to our Site's constructor, used when it makes API
        queries. This way, we can easily preserve cookies between sites (e.g., for
        CentralAuth), making logins easier.
        """
        if self._cookiejar:
            return self._cookiejar
@@ -111,8 +135,10 @@ class SitesDB:

        return self._cookiejar

    def _create_sitesdb(self):
        """Initialize the sitesdb file with its three necessary tables."""
    def _create_sitesdb(self) -> None:
        """
        Initialize the sitesdb file with its three necessary tables.
        """
        script = """
        CREATE TABLE sites (site_name, site_project, site_lang, site_base_url,
                            site_article_path, site_script_path);
@@ -122,11 +148,12 @@ class SitesDB:
        with sqlite.connect(self._sitesdb) as conn:
            conn.executescript(script)

    def _get_site_object(self, name):
        """Return the site from our cache, or create it if it doesn't exist.
    def _get_site_object(self, name: str) -> Site:
        """
        Return the site from our cache, or create it if it doesn't exist.

        This is essentially just a wrapper around _make_site_object that
        returns the same object each time a specific site is asked for.
        This is essentially just a wrapper around _make_site_object that returns the
        same object each time a specific site is asked for.
        """
        try:
            return self._sites[name]
@@ -135,14 +162,12 @@ class SitesDB:
            self._sites[name] = site
            return site

    def _load_site_from_sitesdb(self, name):
        """Return all information stored in the sitesdb relating to given site.
    def _load_site_from_sitesdb(self, name: str) -> _SiteInfoFromDB:
        """
        Return all information stored in the sitesdb relating to given site.

        The information will be returned as a tuple, containing the site's
        name, project, language, base URL, article path, script path, SQL
        connection data, and namespaces, in that order. If the site is not
        found in the database, SiteNotFoundError will be raised. An empty
        database will be created before the exception is raised if none exists.
        If the site is not found in the database, SiteNotFoundError will be raised. An
        empty database will be created before the exception is raised if none exists.
        """
        query1 = "SELECT * FROM sites WHERE site_name = ?"
        query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
@@ -161,7 +186,7 @@ class SitesDB:

        name, project, lang, base_url, article_path, script_path = site_data
        sql = dict(sql_data)
        namespaces = {}
        namespaces: dict[int, list[str]] = {}
        for ns_id, ns_name, ns_is_primary_name in ns_data:
            try:
                if ns_is_primary_name:  # "Primary" name goes first in list
@@ -171,7 +196,7 @@ class SitesDB:
            except KeyError:
                namespaces[ns_id] = [ns_name]

        return (
        return _SiteInfoFromDB(
            name,
            project,
            lang,
@@ -182,16 +207,16 @@ class SitesDB:
            namespaces,
        )

    def _make_site_object(self, name):
        """Return a Site object associated with the site *name* in our sitesdb.
    def _make_site_object(self, name: str) -> Site:
        """
        Return a Site object associated with the site *name* in our sitesdb.

        This calls _load_site_from_sitesdb(), so SiteNotFoundError will be
        raised if the site is not in our sitesdb.
        This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised if
        the site is not in our sitesdb.
        """
        cookiejar = self._get_cookiejar()
        (name, project, lang, base_url, article_path, script_path, sql, namespaces) = (
            self._load_site_from_sitesdb(name)
        )
        info = self._load_site_from_sitesdb(name)
        name = info.name

        config = self.config
        login = (config.wiki.get("username"), config.wiki.get("password"))
@@ -213,6 +238,7 @@ class SitesDB:
            search_config["nltk_dir"] = nltk_dir
            search_config["exclusions_db"] = self._exclusions_db

        sql = info.sql
        if not sql:
            sql = config.wiki.get("sql", OrderedDict()).copy()
            for key, value in sql.items():
@@ -221,13 +247,13 @@ class SitesDB:

        return Site(
            name=name,
            project=project,
            lang=lang,
            base_url=base_url,
            article_path=article_path,
            script_path=script_path,
            project=info.project,
            lang=info.lang,
            base_url=info.base_url,
            article_path=info.article_path,
            script_path=info.script_path,
            sql=sql,
            namespaces=namespaces,
            namespaces=info.namespaces,
            login=login,
            oauth=oauth,
            cookiejar=cookiejar,
@@ -240,18 +266,18 @@ class SitesDB:
            search_config=search_config,
        )

    def _get_site_name_from_sitesdb(self, project, lang):
        """Return the name of the first site with the given project and lang.
    def _get_site_name_from_sitesdb(self, project: str, lang: str) -> str | None:
        """
        Return the name of the first site with the given project and lang.

        If we can't find the site with the given information, we'll also try
        searching for a site whose base_url contains "{lang}.{project}". There
        are a few sites, like the French Wikipedia, that set their project to
        something other than the expected "wikipedia" ("wikipédia" in this
        case), but we should correctly find them when doing get_site(lang="fr",
        project="wikipedia").
        If we can't find the site with the given information, we'll also try searching
        for a site whose base_url contains "{lang}.{project}". There are a few sites,
        like the French Wikipedia, that set their project to something other than the
        expected "wikipedia" ("wikipédia" in this case), but we should correctly find
        them when doing get_site(lang="fr", project="wikipedia").

        If the site is not found, return None. An empty sitesdb will be created
        if none exists.
        If the site is not found, return None. An empty sitesdb will be created if
        none exists.
        """
        query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?"
        query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?"
@@ -267,26 +293,27 @@ class SitesDB:
            except sqlite.OperationalError:
                self._create_sitesdb()

    def _add_site_to_sitesdb(self, site):
        """Extract relevant info from a Site object and add it to the sitesdb.
    def _add_site_to_sitesdb(self, site: Site) -> None:
        """
        Extract relevant info from a Site object and add it to the sitesdb.

        Works like a reverse _load_site_from_sitesdb(); the site's project,
        language, base URL, article path, script path, SQL connection data, and
        namespaces are extracted from the site and inserted into the sites
        database. If the sitesdb doesn't exist, we'll create it first.
        Works like a reverse _load_site_from_sitesdb(); the site's project, language,
        base URL, article path, script path, SQL connection data, and namespaces are
        extracted from the site and inserted into the sites database. If the sitesdb
        doesn't exist, we'll create it first.
        """
        name = site.name
        sites_data = (
            name,
            site.project,
            site.lang,
            site._base_url,
            site._article_path,
            site._script_path,
            site.base_url,
            site.article_path,
            site.script_path,
        )
        sql_data = [(name, key, val) for key, val in site._sql_data.items()]
        ns_data = []
        for ns_id, ns_names in site._namespaces.items():
        ns_data: list[tuple[str, int, str, bool]] = []
        for ns_id, ns_names in site.namespaces.items():
            ns_data.append((name, ns_id, ns_names.pop(0), True))
            for ns_name in ns_names:
                ns_data.append((name, ns_id, ns_name, False))
@@ -306,8 +333,10 @@ class SitesDB:
            conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data)
            conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data)

    def _remove_site_from_sitesdb(self, name):
        """Remove a site by name from the sitesdb and the internal cache."""
    def _remove_site_from_sitesdb(self, name: str) -> bool:
        """
        Remove a site by name from the sitesdb and the internal cache.
        """
        try:
            del self._sites[name]
        except KeyError:
@@ -323,30 +352,34 @@ class SitesDB:
                self._logger.info(f"Removed site '{name}'")
                return True

    def get_site(self, name=None, project=None, lang=None):
        """Return a Site instance based on information from the sitesdb.
    def get_site(
        self,
        name: str | None = None,
        project: str | None = None,
        lang: str | None = None,
    ) -> Site:
        """
        Return a Site instance based on information from the sitesdb.

        With no arguments, return the default site as specified by our config
        file. This is ``config.wiki["defaultSite"]``.
        With no arguments, return the default site as specified by our config file.
        This is ``config.wiki["defaultSite"]``.

        With *name* specified, return the site with that name. This is
        equivalent to the site's ``wikiid`` in the API, like *enwiki*.
        With *name* specified, return the site with that name. This is equivalent to
        the site's ``wikiid`` in the API, like *enwiki*.

        With *project* and *lang* specified, return the site whose project and
        language match these values. If there are multiple sites with the same
        values (unlikely), this is not a reliable way of loading a site. Call
        the function with an explicit *name* in that case.
        With *project* and *lang* specified, return the site whose project and language
        match these values. If there are multiple sites with the same values
        (unlikely), this is not a reliable way of loading a site. Call the function
        with an explicit *name* in that case.

        We will attempt to login to the site automatically using
        ``config.wiki["username"]`` and ``config.wiki["password"]`` if both are
        defined.

        Specifying a project without a lang or a lang without a project will
        raise :py:exc:`TypeError`. If all three args are specified, *name* will
        be first tried, then *project* and *lang* if *name* doesn't work. If a
        site cannot be found in the sitesdb,
        :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An
        empty sitesdb will be created if none is found.
        ``config.wiki["username"]`` and ``config.wiki["password"]`` if both are defined.

        Specifying a project without a lang or a lang without a project will raise
        :py:exc:`TypeError`. If all three args are specified, *name* will be first
        tried, then *project* and *lang* if *name* doesn't work. If a site cannot be
        found in the sitesdb, :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be
        raised. An empty sitesdb will be created if none is found.
        """
        # Someone specified a project without a lang, or vice versa:
        if (project and not lang) or (not project and lang):
@@ -374,6 +407,7 @@ class SitesDB:
                raise

        # If we end up here, then project and lang are the only args given:
        assert project is not None and lang is not None, (project, lang)
        name = self._get_site_name_from_sitesdb(project, lang)
        if name:
            return self._get_site_object(name)
@@ -381,30 +415,34 @@ class SitesDB:
        raise SiteNotFoundError(e)

    def add_site(
        self, project=None, lang=None, base_url=None, script_path="/w", sql=None
    ):
        """Add a site to the sitesdb so it can be retrieved with get_site().
        self,
        project: str | None = None,
        lang: str | None = None,
        base_url: str | None = None,
        script_path: str = "/w",
        sql: SqlConnInfo | None = None,
    ) -> Site:
        """
        Add a site to the sitesdb so it can be retrieved with get_site().

        If only a project and a lang are given, we'll guess the *base_url* as
        ``"//{lang}.{project}.org"`` (which is protocol-relative, becoming
        ``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``).
        If this is wrong, provide the correct *base_url* as an argument (in
        which case project and lang are ignored). Most wikis use ``"/w"`` as
        the script path (meaning the API is located at
        ``"{base_url}{script_path}/api.php"`` ->
        ``"//{lang}.{project}.org/w/api.php"``), so this is the default. If
        your wiki is different, provide the script_path as an argument. SQL
        connection settings are guessed automatically using config's template
        value. If this is wrong or not specified, provide a dict of kwargs as
        *sql* and Site will pass it to :py:func:`pymysql.connect(**sql)
        <pymysql.connect>`, allowing you to make queries with
        :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.

        Returns ``True`` if the site was added successfully or ``False`` if the
        site is already in our sitesdb (this can be done purposefully to update
        old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError`
        if not enough information has been provided to identify the site (e.g.
        a *project* but not a *lang*).
        ``"//{lang}.{project}.org"`` (which is protocol-relative, becoming ``"https"``
        if *useHTTPS* is ``True`` in config otherwise ``"http"``). If this is wrong,
        provide the correct *base_url* as an argument (in which case project and lang
        are ignored). Most wikis use ``"/w"`` as the script path (meaning the API is
        located at ``"{base_url}{script_path}/api.php"`` ->
        ``"//{lang}.{project}.org/w/api.php"``), so this is the default. If your wiki
        is different, provide the script_path as an argument. SQL connection settings
        are guessed automatically using config's template value. If this is wrong or
        not specified, provide a dict of kwargs as *sql* and Site will pass it to
        :py:func:`pymysql.connect(**sql) <pymysql.connect>`, allowing you to make
        queries with :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.

        Returns ``True`` if the site was added successfully or ``False`` if the site is
        already in our sitesdb (this can be done purposefully to update old site info).
        Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` if not enough
        information has been provided to identify the site (e.g. a *project* but not
        a *lang*).
        """
        if not base_url:
            if not project or not lang:
@@ -445,7 +483,12 @@ class SitesDB:
        self._add_site_to_sitesdb(site)
        return self._get_site_object(site.name)

    def remove_site(self, name=None, project=None, lang=None):
    def remove_site(
        self,
        name: str | None = None,
        project: str | None = None,
        lang: str | None = None,
    ) -> bool:
        """Remove a site from the sitesdb.

        Returns ``True`` if the site was removed successfully or ``False`` if
--- a/earwigbot/wiki/user.py
+++ b/earwigbot/wiki/user.py
@@ -1,4 +1,4 @@
 # Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 # Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -18,14 +18,21 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from logging import NullHandler, getLogger
 from socket import AF_INET, AF_INET6, inet_pton
 from time import gmtime, strptime
 from __future__ import annotations

 import socket
 import time
 import typing
 from logging import Logger, NullHandler, getLogger
 from typing import Any, Literal

 from earwigbot.exceptions import UserNotFoundError
 from earwigbot.wiki import constants
 from earwigbot.wiki.page import Page

 if typing.TYPE_CHECKING:
    from earwigbot.wiki.site import Site

 __all__ = ["User"]


@@ -33,10 +40,9 @@ class User:
    """
    **EarwigBot: Wiki Toolset: User**

    Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has
    methods for getting a bunch of information about the user, such as
    editcount and user rights, methods for returning the user's userpage and
    talkpage, etc.
    Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for
    getting a bunch of information about the user, such as editcount and user rights,
    methods for returning the user's userpage and talkpage, etc.

    *Attributes:*

@@ -56,24 +62,23 @@ class User:
    *Public methods:*

    - :py:meth:`reload`:       forcibly reloads the user's attributes
    - :py:meth:`get_userpage`: returns a Page object representing the user's
      userpage
    - :py:meth:`get_talkpage`: returns a Page object representing the user's
      talkpage
    - :py:meth:`get_userpage`: returns a Page object representing the user's userpage
    - :py:meth:`get_talkpage`: returns a Page object representing the user's talkpage
    """

    def __init__(self, site, name, logger=None):
        """Constructor for new User instances.
    def __init__(self, site: Site, name: str, logger: Logger | None = None) -> None:
        """
        Constructor for new User instances.

        Takes two arguments, a Site object (necessary for doing API queries),
        and the name of the user, preferably without "User:" in front, although
        this prefix will be automatically removed by the API if given.
        Takes two arguments, a Site object (necessary for doing API queries), and the
        name of the user, preferably without "User:" in front, although this prefix
        will be automatically removed by the API if given.

        You can also use site.get_user() instead, which returns a User object,
        and is preferred.
        You can also use site.get_user() instead, which returns a User object, and
        is preferred.

        We won't do any API queries yet for basic information about the user -
        save that for when the information is requested.
        We won't do any API queries yet for basic information about the user - save
        that for when the information is requested.
        """
        self._site = site
        self._name = name
@@ -85,22 +90,27 @@ class User:
            self._logger = getLogger("earwigbot.wiki")
            self._logger.addHandler(NullHandler())

    def __repr__(self):
        """Return the canonical string representation of the User."""
    def __repr__(self) -> str:
        """
        Return the canonical string representation of the User.
        """
        return f"User(name={self._name!r}, site={self._site!r})"

    def __str__(self):
        """Return a nice string representation of the User."""
    def __str__(self) -> str:
        """
        Return a nice string representation of the User.
        """
        return f'<User "{self.name}" of {str(self.site)}>'

    def _get_attribute(self, attr):
        """Internally used to get an attribute by name.
    def _get_attribute(self, attr: str) -> Any:
        """
        Internally used to get an attribute by name.

        We'll call _load_attributes() to get this (and all other attributes)
        from the API if it is not already defined.
        We'll call _load_attributes() to get this (and all other attributes) from the
        API if it is not already defined.

        Raises UserNotFoundError if a nonexistant user prevents us from
        returning a certain attribute.
        Raises UserNotFoundError if a nonexistant user prevents us from returning a
        certain attribute.
        """
        if not hasattr(self, attr):
            self._load_attributes()
@@ -109,11 +119,12 @@ class User:
            raise UserNotFoundError(e)
        return getattr(self, attr)

    def _load_attributes(self):
        """Internally used to load all attributes from the API.
    def _load_attributes(self) -> None:
        """
        Internally used to load all attributes from the API.

        Normally, this is called by _get_attribute() when a requested attribute
        is not defined. This defines it.
        Normally, this is called by _get_attribute() when a requested attribute is not
        defined. This defines it.
        """
        props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
        result = self.site.api_query(
@@ -150,11 +161,11 @@ class User:

        reg = res["registration"]
        try:
            self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ")
            self._registration = time.strptime(reg, "%Y-%m-%dT%H:%M:%SZ")
        except TypeError:
            # Sometimes the API doesn't give a date; the user's probably really
            # old. There's nothing else we can do!
            self._registration = gmtime(0)
            self._registration = time.gmtime(0)

        try:
            res["emailable"]
@@ -166,24 +177,28 @@ class User:
        self._gender = res["gender"]

    @property
    def site(self):
        """The user's corresponding Site object."""
    def site(self) -> Site:
        """
        The user's corresponding Site object.
        """
        return self._site

    @property
    def name(self):
        """The user's username.
    def name(self) -> str:
        """
        The user's username.

        This will never make an API query on its own, but if one has already
        been made by the time this is retrieved, the username may have been
        "normalized" from the original input to the constructor, converted into
        a Unicode object, with underscores removed, etc.
        This will never make an API query on its own, but if one has already been made
        by the time this is retrieved, the username may have been "normalized" from the
        original input to the constructor, converted into a Unicode object, with
        underscores removed, etc.
        """
        return self._name

    @property
    def exists(self):
        """``True`` if the user exists, or ``False`` if they do not.
    def exists(self) -> bool:
        """
        ``True`` if the user exists, or ``False`` if they do not.

        Makes an API query only if we haven't made one already.
        """
@@ -192,124 +207,135 @@ class User:
        return self._exists

    @property
    def userid(self):
        """An integer ID used by MediaWiki to represent the user.
    def userid(self) -> int:
        """
        An integer ID used by MediaWiki to represent the user.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_userid")

    @property
    def blockinfo(self):
        """Information about any current blocks on the user.
    def blockinfo(self) -> dict[str, Any] | Literal[False]:
        """
        Information about any current blocks on the user.

        If the user is not blocked, returns ``False``. If they are, returns a
        dict with three keys: ``"by"`` is the blocker's username, ``"reason"``
        is the reason why they were blocked, and ``"expiry"`` is when the block
        expires.
        If the user is not blocked, returns ``False``. If they are, returns a dict with
        three keys: ``"by"`` is the blocker's username, ``"reason"`` is the reason why
        they were blocked, and ``"expiry"`` is when the block expires.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_blockinfo")

    @property
    def groups(self):
        """A list of groups this user is in, including ``"*"``.
    def groups(self) -> list[str]:
        """
        A list of groups this user is in, including ``"*"``.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_groups")

    @property
    def rights(self):
        """A list of this user's rights.
    def rights(self) -> list[str]:
        """
        A list of this user's rights.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_rights")

    @property
    def editcount(self):
        """Returns the number of edits made by the user.
    def editcount(self) -> int:
        """
        Returns the number of edits made by the user.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_editcount")

    @property
    def registration(self):
        """The time the user registered as a :py:class:`time.struct_time`.
    def registration(self) -> time.struct_time:
        """
        The time the user registered as a :py:class:`time.struct_time`.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_registration")

    @property
    def emailable(self):
        """``True`` if the user can be emailed, or ``False`` if they cannot.
    def emailable(self) -> bool:
        """
        ``True`` if the user can be emailed, or ``False`` if they cannot.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_emailable")

    @property
    def gender(self):
        """The user's gender.
    def gender(self) -> str:
        """
        The user's gender.

        Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they
        did not specify it.
        Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they did not
        specify it.

        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
        does not exist. Makes an API query only if we haven't made one already.
        Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
        exist. Makes an API query only if we haven't made one already.
        """
        return self._get_attribute("_gender")

    @property
    def is_ip(self):
        """``True`` if the user is an IP address, or ``False`` otherwise.
    def is_ip(self) -> bool:
        """
        ``True`` if the user is an IP address, or ``False`` otherwise.

        This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the
        username. No API queries are made.
        This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the username.
        No API queries are made.
        """
        try:
            inet_pton(AF_INET, self.name)
            socket.inet_pton(socket.AF_INET, self.name)
        except OSError:
            try:
                inet_pton(AF_INET6, self.name)
                socket.inet_pton(socket.AF_INET6, self.name)
            except OSError:
                return False
        return True

    def reload(self):
        """Forcibly reload the user's attributes.
    def reload(self) -> None:
        """
        Forcibly reload the user's attributes.

        Emphasis on *reload*: this is only necessary if there is reason to
        believe they have changed.
        Emphasis on *reload*: this is only necessary if there is reason to believe they
        have changed.
        """
        self._load_attributes()

    def get_userpage(self):
        """Return a Page object representing the user's userpage.
    def get_userpage(self) -> Page:
        """
        Return a Page object representing the user's userpage.

        No checks are made to see if it exists or not. Proper site namespace
        conventions are followed.
        No checks are made to see if it exists or not. Proper site namespace conventions
        are followed.
        """
        prefix = self.site.namespace_id_to_name(constants.NS_USER)
        pagename = ":".join((prefix, self._name))
        return Page(self.site, pagename)

    def get_talkpage(self):
        """Return a Page object representing the user's talkpage.
    def get_talkpage(self) -> Page:
        """
        Return a Page object representing the user's talkpage.

        No checks are made to see if it exists or not. Proper site namespace
        conventions are followed.
        No checks are made to see if it exists or not. Proper site namespace conventions
        are followed.
        """
        prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
        pagename = ":".join((prefix, self._name))