Browse Source

Code cleanup and typing

tags/v0.4
Ben Kurtovic 5 months ago
parent
commit
6a7e6dcad9
10 changed files with 1327 additions and 920 deletions
  1. +1
    -1
      CHANGELOG
  2. +30
    -15
      earwigbot/__init__.py
  3. +3
    -0
      earwigbot/exceptions.py
  4. +27
    -4
      earwigbot/tasks/wikiproject_tagger.py
  5. +90
    -66
      earwigbot/wiki/category.py
  6. +49
    -5
      earwigbot/wiki/constants.py
  7. +290
    -225
      earwigbot/wiki/page.py
  8. +542
    -378
      earwigbot/wiki/site.py
  9. +166
    -123
      earwigbot/wiki/sitesdb.py
  10. +129
    -103
      earwigbot/wiki/user.py

+ 1
- 1
CHANGELOG View File

@@ -1,6 +1,6 @@
v0.4 (unreleased):

- Migrated to Python 3 (3.11+).
- Migrated to Python 3 (3.11+). Substantial code cleanup.
- Migrated from oursql to pymysql.
- Copyvios: Configurable proxy support for specific domains.
- Copyvios: Parser-directed URL redirection.


+ 30
- 15
earwigbot/__init__.py View File

@@ -1,4 +1,4 @@
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,15 +20,16 @@

"""
`EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits
Wikipedia and interacts with people over IRC.
Wikipedia and interacts over IRC.

See :file:`README.rst` for an overview, or the :file:`docs/` directory for
details. This documentation is also available `online
<https://packages.python.org/earwigbot>`_.
See :file:`README.rst` for an overview, or the :file:`docs/` directory for details.
This documentation is also available `online <https://packages.python.org/earwigbot>`_.
"""

import typing

__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2009-2019 Ben Kurtovic"
__copyright__ = "Copyright (C) 2009-2024 Ben Kurtovic"
__license__ = "MIT License"
__version__ = "0.4.dev0"
__email__ = "ben.kurtovic@gmail.com"
@@ -57,12 +58,26 @@ from earwigbot import lazy

importer = lazy.LazyImporter()

bot = importer.new("earwigbot.bot")
commands = importer.new("earwigbot.commands")
config = importer.new("earwigbot.config")
exceptions = importer.new("earwigbot.exceptions")
irc = importer.new("earwigbot.irc")
managers = importer.new("earwigbot.managers")
tasks = importer.new("earwigbot.tasks")
util = importer.new("earwigbot.util")
wiki = importer.new("earwigbot.wiki")
if typing.TYPE_CHECKING:
from earwigbot import (
bot,
commands,
config,
exceptions,
irc,
managers,
tasks,
util,
wiki,
)

else:
bot = importer.new("earwigbot.bot")
commands = importer.new("earwigbot.commands")
config = importer.new("earwigbot.config")
exceptions = importer.new("earwigbot.exceptions")
irc = importer.new("earwigbot.irc")
managers = importer.new("earwigbot.managers")
tasks = importer.new("earwigbot.tasks")
util = importer.new("earwigbot.util")
wiki = importer.new("earwigbot.wiki")

+ 3
- 0
earwigbot/exceptions.py View File

@@ -107,6 +107,9 @@ class APIError(ServiceError):
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
"""

code: str
info: str


class SQLError(ServiceError):
"""Some error involving SQL querying occurred.


+ 27
- 4
earwigbot/tasks/wikiproject_tagger.py View File

@@ -43,13 +43,14 @@ JobKwargs = TypedDict(
"nocreate": NotRequired[bool],
"recursive": NotRequired[bool | int],
"tag-categories": NotRequired[bool],
"not-in-category": NotRequired[str],
"site": NotRequired[str],
"dry-run": NotRequired[bool],
},
)


@dataclass
@dataclass(frozen=True)
class Job:
"""
Represents a single wikiproject-tagging task.
@@ -68,11 +69,20 @@ class Job:
only_with: set[str] | None
nocreate: bool
tag_categories: bool
not_in_category: str | None
dry_run: bool

counter: int = 0
_counter: list[int] = [0] # Wrap to allow frozen updates
processed_cats: set[str] = field(default_factory=set)
processed_pages: set[str] = field(default_factory=set)
skip_pages: set[str] = field(default_factory=set)

@property
def counter(self) -> int:
return self._counter[0]

def add_to_counter(self, value: int) -> None:
self._counter[0] += value


class ShutoffEnabled(Exception):
@@ -90,7 +100,7 @@ class WikiProjectTagger(Task):
Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER
[--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS]
[--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]]
[--site SITE] [--dry-run]`
[--not-in-category CAT] [--site SITE] [--dry-run]`

.. glossary::

@@ -126,6 +136,8 @@ class WikiProjectTagger(Task):
``NUM`` isn't provided, go infinitely (this can be dangerous)
``--tag-categories``
also tag category pages
``--not-in-category CAT``
skip talk pages that are already members of this category
``--site SITE``
the ID of the site to tag pages on, defaulting to the default site
``--dry-run``
@@ -189,6 +201,7 @@ class WikiProjectTagger(Task):
nocreate = kwargs.get("nocreate", False)
recursive = kwargs.get("recursive", 0)
tag_categories = kwargs.get("tag-categories", False)
not_in_category = kwargs.get("not-in-category")
dry_run = kwargs.get("dry-run", False)
banner, names = self.get_names(site, banner)
if not names:
@@ -210,6 +223,7 @@ class WikiProjectTagger(Task):
only_with=only_with,
nocreate=nocreate,
tag_categories=tag_categories,
not_in_category=not_in_category,
dry_run=dry_run,
)

@@ -224,6 +238,11 @@ class WikiProjectTagger(Task):
"""
Run a tagging *job* on a given *site*.
"""
if job.not_in_category:
skip_category = site.get_category(job.not_in_category)
for page in skip_category.get_members():
job.skip_pages.add(page.title)

if "category" in kwargs:
title = kwargs["category"]
title = self.guess_namespace(site, title, constants.NS_CATEGORY)
@@ -322,6 +341,10 @@ class WikiProjectTagger(Task):
if not page.is_talkpage:
page = page.toggle_talk()

if page.title in job.skip_pages:
self.logger.debug(f"Skipping page, in category to skip: [[{page.title}]]")
return

if page.title in job.processed_pages:
self.logger.debug(f"Skipping page, already processed: [[{page.title}]]")
return
@@ -330,7 +353,7 @@ class WikiProjectTagger(Task):
if job.counter % 10 == 0: # Do a shutoff check every ten pages
if self.shutoff_enabled(page.site):
raise ShutoffEnabled()
job.counter += 1
job.add_to_counter(1)

try:
code = page.parse()


+ 90
- 66
earwigbot/wiki/category.py View File

@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -18,6 +18,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections.abc import Iterator

from earwigbot.wiki.constants import Service
from earwigbot.wiki.page import Page

__all__ = ["Category"]
@@ -27,14 +30,14 @@ class Category(Page):
"""
**EarwigBot: Wiki Toolset: Category**

Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a
subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional
methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should
work fine on :py:class:`Category` objects. :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category`
instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category`
is shorthand, accepting category names without the namespace prefix.
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a subclass
of :py:class:`~earwigbot.wiki.page.Page`. Provides additional methods, but
:py:class:`~earwigbot.wiki.page.Page`'s own methods should work fine on
:py:class:`Category` objects. :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` instead of
a :py:class:`~earwigbot.wiki.page.Page` if the given title is in the category
namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` is shorthand,
accepting category names without the namespace prefix.

*Attributes:*

@@ -48,22 +51,30 @@ class Category(Page):
- :py:meth:`get_members`: iterates over Pages in the category
"""

def __repr__(self):
"""Return the canonical string representation of the Category."""
def __repr__(self) -> str:
"""
Return the canonical string representation of the Category.
"""
res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})"
return res.format(self._title, self._follow_redirects, self._site)

def __str__(self):
"""Return a nice string representation of the Category."""
def __str__(self) -> str:
"""
Return a nice string representation of the Category.
"""
return f'<Category "{self.title}" of {str(self.site)}>'

def __iter__(self):
"""Iterate over all members of the category."""
def __iter__(self) -> Iterator[Page]:
"""
Iterate over all members of the category.
"""
return self.get_members()

def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API."""
params = {
def _get_members_via_api(self, limit: int | None, follow: bool) -> Iterator[Page]:
"""
Iterate over Pages in the category using the API.
"""
params: dict[str, str | int] = {
"action": "query",
"list": "categorymembers",
"cmtitle": self.title,
@@ -84,8 +95,10 @@ class Category(Page):
else:
break

def _get_members_via_sql(self, limit, follow):
"""Iterate over Pages in the category using SQL."""
def _get_members_via_sql(self, limit: int | None, follow: bool) -> Iterator[Page]:
"""
Iterate over Pages in the category using SQL.
"""
query = """SELECT page_title, page_namespace, page_id FROM page
JOIN categorylinks ON page_id = cl_from
WHERE cl_to = ?"""
@@ -107,16 +120,20 @@ class Category(Page):
title = base
yield self.site.get_page(title, follow_redirects=follow, pageid=row[2])

def _get_size_via_api(self, member_type):
"""Return the size of the category using the API."""
def _get_size_via_api(self, member_type: str) -> int:
"""
Return the size of the category using the API.
"""
result = self.site.api_query(
action="query", prop="categoryinfo", titles=self.title
)
info = list(result["query"]["pages"].values())[0]["categoryinfo"]
return info[member_type]

def _get_size_via_sql(self, member_type):
"""Return the size of the category using SQL."""
def _get_size_via_sql(self, member_type: str) -> int:
"""
Return the size of the category using SQL.
"""
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
title = self.title.replace(" ", "_").split(":", 1)[1]
if member_type == "size":
@@ -126,49 +143,54 @@ class Category(Page):
result = self.site.sql_query(query, (title, member_type[:-1]))
return list(result)[0][0]

def _get_size(self, member_type):
"""Return the size of the category."""
def _get_size(self, member_type: str) -> int:
"""
Return the size of the category.
"""
services = {
self.site.SERVICE_API: self._get_size_via_api,
self.site.SERVICE_SQL: self._get_size_via_sql,
Service.API: self._get_size_via_api,
Service.SQL: self._get_size_via_sql,
}
return self.site.delegate(services, (member_type,))
return self.site.delegate(services, member_type)

@property
def size(self):
"""The total number of members in the category.
def size(self) -> int:
"""
The total number of members in the category.

Includes pages, files, and subcats. Equal to :py:attr:`pages` +
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or
SQL depending on which are enabled and the amount of lag on each. This
is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or SQL
depending on which are enabled and the amount of lag on each. This is handled
by :py:meth:`site.delegate() <earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("size")

@property
def pages(self):
"""The number of pages in the category.
def pages(self) -> int:
"""
The number of pages in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
This will use either the API or SQL depending on which are enabled and the
amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("pages")

@property
def files(self):
"""The number of files in the category.
def files(self) -> int:
"""
The number of files in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
This will use either the API or SQL depending on which are enabled and the
amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("files")

@property
def subcats(self):
"""The number of subcategories in the category.
def subcats(self) -> int:
"""
The number of subcategories in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
@@ -176,36 +198,38 @@ class Category(Page):
"""
return self._get_size("subcats")

def get_members(self, limit=None, follow_redirects=None):
"""Iterate over Pages in the category.
def get_members(
self, limit: int | None = None, follow_redirects: bool | None = None
) -> Iterator[Page]:
"""
Iterate over Pages in the category.

If *limit* is given, we will provide this many pages, or less if the
category is smaller. By default, *limit* is ``None``, meaning we will
keep iterating over members until the category is exhausted.
*follow_redirects* is passed directly to :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which
will use the value passed to our :py:meth:`__init__`.
If *limit* is given, we will provide this many pages, or less if the category
is smaller. By default, *limit* is ``None``, meaning we will keep iterating
over members until the category is exhausted. *follow_redirects* is passed
directly to :py:meth:`site.get_page() <earwigbot.wiki.site.Site.get_page>`;
it defaults to ``None``, which will use the value passed to our
:py:meth:`__init__`.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
This will use either the API or SQL depending on which are enabled and the
amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.

.. note::
Be careful when iterating over very large categories with no limit.
If using the API, at best, you will make one query per 5000 pages,
which can add up significantly for categories with hundreds of
thousands of members. As for SQL, note that *all page titles are
stored internally* as soon as the query is made, so the site-wide
SQL lock can be freed and unrelated queries can be made without
requiring a separate connection to be opened. This is generally not
an issue unless your category's size approaches several hundred
Be careful when iterating over very large categories with no limit. If using
the API, at best, you will make one query per 5000 pages, which can add up
significantly for categories with hundreds of thousands of members. As for
SQL, note that *all page titles are stored internally* as soon as the query
is made, so the site-wide SQL lock can be freed and unrelated queries can be
made without requiring a separate connection to be opened. This is generally
not an issue unless your category's size approaches several hundred
thousand, in which case the sheer number of titles in memory becomes
problematic.
"""
services = {
self.site.SERVICE_API: self._get_members_via_api,
self.site.SERVICE_SQL: self._get_members_via_sql,
Service.API: self._get_members_via_api,
Service.SQL: self._get_members_via_sql,
}
if follow_redirects is None:
follow_redirects = self._follow_redirects
return self.site.delegate(services, (limit, follow_redirects))
return self.site.delegate(services, limit, follow_redirects)

+ 49
- 5
earwigbot/wiki/constants.py View File

@@ -31,14 +31,50 @@ Import directly with ``from earwigbot.wiki import constants`` or
:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``).
"""

__all__ = [
"NS_CATEGORY_TALK",
"NS_CATEGORY",
"NS_DRAFT_TALK",
"NS_DRAFT",
"NS_FILE_TALK",
"NS_FILE",
"NS_HELP_TALK",
"NS_HELP",
"NS_MAIN",
"NS_MEDIA",
"NS_MEDIAWIKI_TALK",
"NS_MEDIAWIKI",
"NS_MODULE_TALK",
"NS_MODULE",
"NS_PORTAL_TALK",
"NS_PORTAL",
"NS_PROJECT_TALK",
"NS_PROJECT",
"NS_SPECIAL",
"NS_TALK",
"NS_TEMPLATE_TALK",
"NS_TEMPLATE",
"NS_USER_TALK",
"NS_USER",
"USER_AGENT",
]

import platform
from enum import Enum

import earwigbot

# Default User Agent when making API queries:
from platform import python_version as _p
USER_AGENT = (
f"EarwigBot/{earwigbot.__version__} "
f"(Python/{platform.python_version()}; https://github.com/earwig/earwigbot)"
)

from earwigbot import __version__ as _v

USER_AGENT = "EarwigBot/{0} (Python/{1}; https://github.com/earwig/earwigbot)"
USER_AGENT = USER_AGENT.format(_v, _p())
del _v, _p
class Service(Enum):
API = 1
SQL = 2


# Default namespace IDs:
NS_MAIN = 0
@@ -57,5 +93,13 @@ NS_HELP = 12
NS_HELP_TALK = 13
NS_CATEGORY = 14
NS_CATEGORY_TALK = 15

NS_PORTAL = 100
NS_PORTAL_TALK = 101
NS_DRAFT = 118
NS_DRAFT_TALK = 119
NS_MODULE = 828
NS_MODULE_TALK = 829

NS_SPECIAL = -1
NS_MEDIA = -2

+ 290
- 225
earwigbot/wiki/page.py View File

@@ -1,4 +1,4 @@
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -18,17 +18,27 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import annotations

import hashlib
import re
from hashlib import md5
from logging import NullHandler, getLogger
from time import gmtime, strftime
from urllib.parse import quote
import time
import typing
import urllib.parse
from collections.abc import Iterable
from logging import Logger, NullHandler, getLogger
from typing import Any

import mwparserfromhell

from earwigbot import exceptions
from earwigbot.exceptions import APIError
from earwigbot.wiki.copyvios import CopyvioMixIn

if typing.TYPE_CHECKING:
from earwigbot.wiki.site import Site
from earwigbot.wiki.user import User

__all__ = ["Page"]


@@ -36,10 +46,10 @@ class Page(CopyvioMixIn):
"""
**EarwigBot: Wiki Toolset: Page**

Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has
methods for getting information about the page, getting page content, and
so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of
:py:class:`Page` with additional methods.
Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for
getting information about the page, getting page content, and so on.
:py:class:`~earwigbot.wiki.category.Category` is a subclass of :py:class:`Page`
with additional methods.

*Attributes:*

@@ -59,20 +69,19 @@ class Page(CopyvioMixIn):
- :py:meth:`reload`: forcibly reloads the page's attributes
- :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa
- :py:meth:`get`: returns the page's content
- :py:meth:`get_redirect_target`: returns the page's destination if it is a
redirect
- :py:meth:`get_creator`: returns a User object representing the first
person to edit the page
- :py:meth:`get_redirect_target`: returns the page's destination if it is a redirect
- :py:meth:`get_creator`: returns a User object representing the first person to
edit the page
- :py:meth:`parse`: parses the page content for templates, links, etc
- :py:meth:`edit`: replaces the page's content or creates a new page
- :py:meth:`add_section`: adds a new section at the bottom of the page
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit
the page, per ``{{bots}}``/``{{nobots}}``
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit the
page, per ``{{bots}}``/``{{nobots}}``

- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`:
checks the page for copyright violations
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`:
checks the page like :py:meth:`copyvio_check`, but against a specific URL
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: checks the page
for copyright violations
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: checks the
page like :py:meth:`copyvio_check`, but against a specific URL
"""

PAGE_UNKNOWN = 0
@@ -80,18 +89,26 @@ class Page(CopyvioMixIn):
PAGE_MISSING = 2
PAGE_EXISTS = 3

def __init__(self, site, title, follow_redirects=False, pageid=None, logger=None):
"""Constructor for new Page instances.
def __init__(
self,
site: Site,
title: str,
follow_redirects: bool = False,
pageid: int | None = None,
logger: Logger | None = None,
) -> None:
"""
Constructor for new Page instances.

Takes four arguments: a Site object, the Page's title (or pagename),
whether or not to follow redirects (optional, defaults to False), and
a page ID to supplement the title (optional, defaults to None - i.e.,
we will have to query the API to get it).
Takes four arguments: a Site object, the Page's title (or pagename), whether or
not to follow redirects (optional, defaults to False), and a page ID to
supplement the title (optional, defaults to None - i.e., we will have to query
the API to get it).

As with User, site.get_page() is preferred.

__init__() will not do any API queries, but it will use basic namespace
logic to determine our namespace ID and if we are a talkpage.
__init__() will not do any API queries, but it will use basic namespace logic
to determine our namespace ID and if we are a talkpage.
"""
super().__init__(site)
self._site = site
@@ -108,16 +125,16 @@ class Page(CopyvioMixIn):

# Attributes to be loaded through the API:
self._exists = self.PAGE_UNKNOWN
self._is_redirect = None
self._lastrevid = None
self._protection = None
self._fullurl = None
self._content = None
self._creator = None
self._is_redirect: bool | None = None
self._lastrevid: int | None = None
self._protection: dict | None = None
self._fullurl: str | None = None
self._content: str | None = None
self._creator: str | None = None

# Attributes used for editing/deleting/protecting/etc:
self._basetimestamp = None
self._starttimestamp = None
self._basetimestamp: str | None = None
self._starttimestamp: str | None = None

# Try to determine the page's namespace using our site's namespace
# converter:
@@ -137,54 +154,60 @@ class Page(CopyvioMixIn):
else:
self._is_talkpage = self._namespace % 2 == 1

def __repr__(self):
"""Return the canonical string representation of the Page."""
def __repr__(self) -> str:
"""
Return the canonical string representation of the Page.
"""
res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})"
return res.format(self._title, self._follow_redirects, self._site)

def __str__(self):
"""Return a nice string representation of the Page."""
def __str__(self) -> str:
"""
Return a nice string representation of the Page.
"""
return f'<Page "{self.title}" of {str(self.site)}>'

def _assert_validity(self):
"""Used to ensure that our page's title is valid.
def _assert_validity(self) -> None:
"""
Used to ensure that our page's title is valid.

If this method is called when our page is not valid (and after
_load_attributes() has been called), InvalidPageError will be raised.

Note that validity != existence. If a page's title is invalid (e.g, it
contains "[") it will always be invalid, and cannot be edited.
Note that validity != existence. If a page's title is invalid (e.g, it contains
"[") it will always be invalid, and cannot be edited.
"""
if self._exists == self.PAGE_INVALID:
e = f"Page '{self._title}' is invalid."
raise exceptions.InvalidPageError(e)

def _assert_existence(self):
"""Used to ensure that our page exists.
def _assert_existence(self) -> None:
"""
Used to ensure that our page exists.

If this method is called when our page doesn't exist (and after
_load_attributes() has been called), PageNotFoundError will be raised.
It will also call _assert_validity() beforehand.
_load_attributes() has been called), PageNotFoundError will be raised. It will
also call _assert_validity() beforehand.
"""
self._assert_validity()
if self._exists == self.PAGE_MISSING:
e = f"Page '{self._title}' does not exist."
raise exceptions.PageNotFoundError(e)

def _load(self):
"""Call _load_attributes() and follows redirects if we're supposed to.
def _load(self) -> None:
"""
Call _load_attributes() and follow redirects if we're supposed to.

This method will only follow redirects if follow_redirects=True was
passed to __init__() (perhaps indirectly passed by site.get_page()).
It avoids the API's &redirects param in favor of manual following,
so we can act more realistically (we don't follow double redirects, and
circular redirects don't break us).
This method will only follow redirects if follow_redirects=True was passed to
__init__() (perhaps indirectly passed by site.get_page()). It avoids the API's
&redirects param in favor of manual following, so we can act more realistically
(we don't follow double redirects, and circular redirects don't break us).

This will raise RedirectError if we have a problem following, but that
is a bug and should NOT happen.
This will raise RedirectError if we have a problem following, but that is a bug
and should NOT happen.

If we're following a redirect, this will make a grand total of three
API queries. It's a lot, but each one is quite small.
If we're following a redirect, this will make a grand total of three API
queries. It's a lot, but each one is quite small.
"""
self._load_attributes()

@@ -194,14 +217,14 @@ class Page(CopyvioMixIn):
self._content = None # reset the content we just loaded
self._load_attributes()

def _load_attributes(self, result=None):
"""Load various data from the API in a single query.
def _load_attributes(self, result: dict | None = None) -> None:
"""
Load various data from the API in a single query.

Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl,
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, and
._starttimestamp using the API. It will do a query of its own unless
*result* is provided, in which case we'll pretend *result* is what the
query returned.
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, ._protection,
._namespace, ._is_talkpage, ._creator, ._lastrevid, and ._starttimestamp using
the API. It will do a query of its own unless *result* is provided, in which
case we'll pretend *result* is what the query returned.

Assuming the API is sound, this should not raise any exceptions.
"""
@@ -217,6 +240,7 @@ class Page(CopyvioMixIn):
titles=self._title,
)

assert result is not None
if "interwiki" in result["query"]:
self._title = result["query"]["interwiki"][0]["title"]
self._exists = self.PAGE_INVALID
@@ -242,7 +266,7 @@ class Page(CopyvioMixIn):

self._fullurl = res["fullurl"]
self._protection = res["protection"]
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
self._starttimestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

# We've determined the namespace and talkpage status in __init__()
# based on the title, but now we can be sure:
@@ -256,15 +280,15 @@ class Page(CopyvioMixIn):
except KeyError:
pass

def _load_content(self, result=None):
"""Load current page content from the API.
def _load_content(self, result: dict | None = None) -> None:
"""
Load current page content from the API.

If *result* is provided, we'll pretend that is the result of an API
query and try to get content from that. Otherwise, we'll do an API
query on our own.
If *result* is provided, we'll pretend that is the result of an API query and
try to get content from that. Otherwise, we'll do an API query on our own.

Don't call this directly, ever; use reload() followed by get() if you
want to force content reloading.
Don't call this directly, ever; use reload() followed by get() if you want to
force content reloading.
"""
if not result:
query = self.site.api_query
@@ -277,6 +301,7 @@ class Page(CopyvioMixIn):
titles=self._title,
)

assert result is not None
res = list(result["query"]["pages"].values())[0]
try:
revision = res["revisions"][0]
@@ -291,32 +316,32 @@ class Page(CopyvioMixIn):

def _edit(
self,
params=None,
text=None,
summary=None,
minor=None,
bot=None,
force=None,
section=None,
captcha_id=None,
captcha_word=None,
**kwargs,
):
"""Edit the page!

If *params* is given, we'll use it as our API query parameters.
Otherwise, we'll build params using the given kwargs via
_build_edit_params().

We'll then try to do the API query, and catch any errors the API raises
in _handle_edit_errors(). We'll then throw these back as subclasses of
EditError.
params: dict[str, Any] | None = None,
text: str | None = None,
summary: str | None = None,
minor: bool | None = None,
bot: bool | None = None,
force: bool | None = None,
section: int | str | None = None,
captcha_id: str | None = None,
captcha_word: str | None = None,
**kwargs: Any,
) -> None:
"""
Edit the page!

If *params* is given, we'll use it as our API query parameters. Otherwise,
we'll build params using the given kwargs via _build_edit_params().

We'll then try to do the API query, and catch any errors the API raises in
_handle_edit_errors(). We'll then throw these back as subclasses of EditError.
"""
# Weed out invalid pages before we get too far:
self._assert_validity()

# Build our API query string:
if not params:
assert text is not None, "Edit text must be provided when params are unset"
params = self._build_edit_params(
text,
summary,
@@ -351,26 +376,26 @@ class Page(CopyvioMixIn):

def _build_edit_params(
self,
text,
summary,
minor,
bot,
force,
section,
captcha_id,
captcha_word,
kwargs,
):
"""Given some keyword arguments, build an API edit query string."""
unitxt = text.encode("utf8") if isinstance(text, str) else text
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct
text: str,
summary: str | None,
minor: bool | None,
bot: bool | None,
force: bool | None,
section: int | str | None,
captcha_id: str | None,
captcha_word: str | None,
kwargs: dict[str, Any],
) -> dict[str, Any]:
"""
Given some keyword arguments, build an API edit query string.
"""
params = {
"action": "edit",
"title": self._title,
"text": text,
"token": self.site.get_token(),
"summary": summary,
"md5": hashed,
"md5": hashlib.md5(text.encode("utf-8")).hexdigest(),
}

if section:
@@ -403,12 +428,15 @@ class Page(CopyvioMixIn):
params[key] = val
return params

def _handle_edit_errors(self, error, params, retry=True):
"""If our edit fails due to some error, try to handle it.
def _handle_edit_errors(
self, error: APIError, params: dict[str, Any], retry: bool = True
) -> dict:
"""
If our edit fails due to some error, try to handle it.

We'll either raise an appropriate exception (for example, if the page
is protected), or we'll try to fix it (for example, if the token is
invalid, we'll try to get a new one).
We'll either raise an appropriate exception (for example, if the page is
protected), or we'll try to fix it (for example, if the token is invalid, we'll
try to get a new one).
"""
perms = [
"noedit",
@@ -447,27 +475,31 @@ class Page(CopyvioMixIn):
raise exceptions.EditError(": ".join((error.code, error.info)))

@property
def site(self):
"""The page's corresponding Site object."""
def site(self) -> Site:
"""
The page's corresponding Site object.
"""
return self._site

@property
def title(self):
"""The page's title, or "pagename".
def title(self) -> str:
"""
The page's title, or "pagename".

This won't do any API queries on its own. Any other attributes or
methods that do API queries will reload the title, however, like
:py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or
following redirects if :py:attr:`self._follow_redirects` is ``True``.
This won't do any API queries on its own. Any other attributes or methods that
do API queries will reload the title, however, like :py:attr:`exists` and
:py:meth:`get`, potentially "normalizing" it or following redirects if
:py:attr:`self._follow_redirects` is ``True``.
"""
return self._title

@property
def exists(self):
"""Whether or not the page exists.
def exists(self) -> int:
"""
Whether or not the page exists.

This will be a number; its value does not matter, but it will equal
one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`,
This will be a number; its value does not matter, but it will equal one of
:py:attr:`self.PAGE_INVALID <PAGE_INVALID>`,
:py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or
:py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`.

@@ -478,55 +510,60 @@ class Page(CopyvioMixIn):
return self._exists

@property
def pageid(self):
"""An integer ID representing the page.
def pageid(self) -> int:
"""
An integer ID representing the page.

Makes an API query only if we haven't already made one and the *pageid*
parameter to :py:meth:`__init__` was left as ``None``, which should be
true for all cases except when pages are returned by an SQL generator
(like :py:meth:`category.get_members()
parameter to :py:meth:`__init__` was left as ``None``, which should be true for
all cases except when pages are returned by an SQL generator (like
:py:meth:`category.get_members()
<earwigbot.wiki.category.Category.get_members>`).

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
or the page does not exist, respectively.
"""
if self._pageid:
return self._pageid
if self._exists == self.PAGE_UNKNOWN:
self._load()
self._assert_existence() # Missing pages do not have IDs
assert self._pageid is not None, "Page exists but does not have an ID"
return self._pageid

@property
def url(self):
"""The page's URL.
def url(self) -> str:
"""
The page's URL.

Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine the
URL ourselves based on the title.
Like :py:meth:`title`, this won't do any API queries on its own. If the API was
never queried for this page, we will attempt to determine the URL ourselves
based on the title.
"""
if self._fullurl:
return self._fullurl
else:
encoded = self._title.encode("utf8").replace(" ", "_")
slug = quote(encoded, safe="/:").decode("utf8")
path = self.site._article_path.replace("$1", slug)
encoded = self._title.replace(" ", "_")
slug = urllib.parse.quote(encoded, safe="/:")
path = self.site.article_path.replace("$1", slug)
return "".join((self.site.url, path))

@property
def namespace(self):
"""The page's namespace ID (an integer).
def namespace(self) -> int:
"""
The page's namespace ID (an integer).

Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine the
namespace ourselves based on the title.
Like :py:meth:`title`, this won't do any API queries on its own. If the API was
never queried for this page, we will attempt to determine the namespace
ourselves based on the title.
"""
return self._namespace

@property
def lastrevid(self):
"""The ID of the page's most recent revision.
def lastrevid(self) -> int | None:
"""
The ID of the page's most recent revision.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
@@ -538,14 +575,15 @@ class Page(CopyvioMixIn):
return self._lastrevid

@property
def protection(self):
"""The page's current protection status.
def protection(self) -> dict | None:
"""
The page's current protection status.

Makes an API query only if we haven't already made one.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page
name is invalid. Won't raise an error if the page is missing because
those can still be create-protected.
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page name is
invalid. Won't raise an error if the page is missing because those can still be
create-protected.
"""
if self._exists == self.PAGE_UNKNOWN:
self._load()
@@ -553,17 +591,18 @@ class Page(CopyvioMixIn):
return self._protection

@property
def is_talkpage(self):
"""``True`` if the page is a talkpage, otherwise ``False``.
def is_talkpage(self) -> bool:
"""
``True`` if the page is a talkpage, otherwise ``False``.

Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine
whether it is a talkpage ourselves based on its namespace.
Like :py:meth:`title`, this won't do any API queries on its own. If the API was
never queried for this page, we will attempt to determine whether it is a
talkpage ourselves based on its namespace.
"""
return self._is_talkpage

@property
def is_redirect(self):
def is_redirect(self) -> bool:
"""``True`` if the page is a redirect, otherwise ``False``.

Makes an API query only if we haven't already made one.
@@ -572,34 +611,36 @@ class Page(CopyvioMixIn):
"""
if self._exists == self.PAGE_UNKNOWN:
self._load()
assert self._is_redirect is not None
return self._is_redirect

def reload(self):
"""Forcibly reload the page's attributes.
def reload(self) -> None:
"""
Forcibly reload the page's attributes.

Emphasis on *reload*: this is only necessary if there is reason to
believe they have changed.
Emphasis on *reload*: this is only necessary if there is reason to believe they
have changed.
"""
self._load()
if self._content is not None:
# Only reload content if it has already been loaded:
self._load_content()

def toggle_talk(self, follow_redirects=None):
"""Return a content page's talk page, or vice versa.
def toggle_talk(self, follow_redirects: bool | None = None) -> Page:
"""
Return a content page's talk page, or vice versa.

The title of the new page is determined by namespace logic, not API
queries. We won't make any API queries on our own.
The title of the new page is determined by namespace logic, not API queries.
We won't make any API queries on our own.

If *follow_redirects* is anything other than ``None`` (the default), it
will be passed to the new :py:class:`~earwigbot.wiki.page.Page`
object's :py:meth:`__init__`. Otherwise, we'll use the value passed to
our own :py:meth:`__init__`.
If *follow_redirects* is anything other than ``None`` (the default), it will be
passed to the new :py:class:`~earwigbot.wiki.page.Page` object's
:py:meth:`__init__`. Otherwise, we'll use the value passed to our own
:py:meth:`__init__`.

Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try
to get the talk page of a special page (in the ``Special:`` or
``Media:`` namespaces), but we won't raise an exception if our page is
otherwise missing or invalid.
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try to get
the talk page of a special page (in the ``Special:`` or ``Media:`` namespaces),
but we won't raise an exception if our page is otherwise missing or invalid.
"""
if self._namespace < 0:
ns = self.site.namespace_id_to_name(self._namespace)
@@ -629,11 +670,12 @@ class Page(CopyvioMixIn):
follow_redirects = self._follow_redirects
return Page(self.site, new_title, follow_redirects)

def get(self):
"""Return page content, which is cached if you try to call get again.
def get(self) -> str:
"""
Return page content, which is cached if you try to call get again.

Raises InvalidPageError or PageNotFoundError if the page name is
invalid or the page does not exist, respectively.
Raises InvalidPageError or PageNotFoundError if the page name is invalid or the
page does not exist, respectively.
"""
if self._exists == self.PAGE_UNKNOWN:
# Kill two birds with one stone by doing an API query for both our
@@ -659,6 +701,7 @@ class Page(CopyvioMixIn):
self._exists = self.PAGE_UNKNOWN # Force another API query
self.get()

assert self._content is not None
return self._content

# Make sure we're dealing with a real page here. This may be outdated
@@ -669,16 +712,17 @@ class Page(CopyvioMixIn):
if self._content is None:
self._load_content()

assert self._content is not None
return self._content

def get_redirect_target(self):
"""If the page is a redirect, return its destination.
def get_redirect_target(self) -> str:
"""
If the page is a redirect, return its destination.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively. Raises
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a
redirect.
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
or the page does not exist, respectively. Raises
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a redirect.
"""
re_redirect = r"^\s*\#\s*redirect\s*\[\[(.*?)\]\]"
content = self.get()
@@ -688,19 +732,20 @@ class Page(CopyvioMixIn):
e = "The page does not appear to have a redirect target."
raise exceptions.RedirectError(e)

def get_creator(self):
"""Return the User object for the first person to edit the page.
def get_creator(self) -> User:
"""
Return the User object for the first person to edit the page.

Makes an API query only if we haven't already made one. Normally, we
can get the creator along with everything else (except content) in
:py:meth:`_load_attributes`. However, due to a limitation in the API
(can't get the editor of one revision and the content of another at
both ends of the history), if our other attributes were only loaded
through :py:meth:`get`, we'll have to do another API query.
Makes an API query only if we haven't already made one. Normally, we can get
the creator along with everything else (except content) in
:py:meth:`_load_attributes`. However, due to a limitation in the API (can't get
the editor of one revision and the content of another at both ends of the
history), if our other attributes were only loaded through :py:meth:`get`,
we'll have to do another API query.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
or the page does not exist, respectively.
"""
if self._exists == self.PAGE_UNKNOWN:
self._load()
@@ -710,41 +755,59 @@ class Page(CopyvioMixIn):
self._assert_existence()
return self.site.get_user(self._creator)

def parse(self):
"""Parse the page content for templates, links, etc.
def parse(self) -> mwparserfromhell.wikicode.Wikicode:
"""
Parse the page content for templates, links, etc.

Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises
:py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid
or the page does not exist, respectively.
"""
return mwparserfromhell.parse(self.get())

def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs):
"""Replace the page's content or creates a new page.
def edit(
self,
text: str,
summary: str | None,
minor: bool = False,
bot: bool = True,
force: bool = False,
**kwargs: Any,
) -> None:
"""
Replace the page's content or creates a new page.

*text* is the new page content, with *summary* as the edit summary.
If *minor* is ``True``, the edit will be marked as minor. If *bot* is
``True``, the edit will be marked as a bot edit, but only if we
actually have a bot flag.
*text* is the new page content, with *summary* as the edit summary. If *minor*
is ``True``, the edit will be marked as minor. If *bot* is ``True``, the edit
will be marked as a bot edit, but only if we actually have a bot flag.

Use *force* to push the new content even if there's an edit conflict or
the page was deleted/recreated between getting our edit token and
editing our page. Be careful with this!
Use *force* to push the new content even if there's an edit conflict or the
page was deleted/recreated between getting our edit token and editing our page.
Be careful with this!
"""
self._edit(
text=text, summary=summary, minor=minor, bot=bot, force=force, **kwargs
)

def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs):
"""Add a new section to the bottom of the page.
def add_section(
self,
text: str,
title: str,
minor: bool = False,
bot: bool = True,
force: bool = False,
**kwargs: Any,
) -> None:
"""
Add a new section to the bottom of the page.

The arguments for this are the same as those for :py:meth:`edit`, but
instead of providing a summary, you provide a section title. Likewise,
raised exceptions are the same as :py:meth:`edit`'s.
The arguments for this are the same as those for :py:meth:`edit`, but instead
of providing a summary, you provide a section title. Likewise, raised
exceptions are the same as :py:meth:`edit`'s.

This should create the page if it does not already exist, with just the
new section as content.
This should create the page if it does not already exist, with just the new
section as content.
"""
self._edit(
text=text,
@@ -756,25 +819,27 @@ class Page(CopyvioMixIn):
**kwargs,
)

def check_exclusion(self, username=None, optouts=None):
"""Check whether or not we are allowed to edit the page.
def check_exclusion(
self, username: str | None = None, optouts: Iterable[str] | None = None
) -> bool:
"""
Check whether or not we are allowed to edit the page.

Return ``True`` if we *are* allowed to edit this page, and ``False`` if
we aren't.

*username* is used to determine whether we are part of a specific list
of allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which
will swipe our username from :py:meth:`site.get_user()
*username* is used to determine whether we are part of a specific list of
allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which will swipe
our username from :py:meth:`site.get_user()
<earwigbot.wiki.site.Site.get_user>`.\
:py:attr:`~earwigbot.wiki.user.User.name`.

*optouts* is a list of messages to consider this check as part of for
the purpose of opt-out; it defaults to ``None``, which ignores the
parameter completely. For example, if *optouts* is ``["nolicense"]``,
we'll return ``False`` on ``{{bots|optout=nolicense}}`` or
``{{bots|optout=all}}``, but `True` on
``{{bots|optout=orfud,norationale,replaceable}}``.
*optouts* is a list of messages to consider this check as part of for the
purpose of opt-out; it defaults to ``None``, which ignores the parameter
completely. For example, if *optouts* is ``["nolicense"]``, we'll return
``False`` on ``{{bots|optout=nolicense}}`` or ``{{bots|optout=all}}``, but
`True` on ``{{bots|optout=orfud,norationale,replaceable}}``.
"""

def parse_param(template, param):


+ 542
- 378
earwigbot/wiki/site.py
File diff suppressed because it is too large
View File


+ 166
- 123
earwigbot/wiki/sitesdb.py View File

@@ -18,78 +18,102 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import annotations

import errno
import sqlite3 as sqlite
import stat
import typing
from collections import OrderedDict
from http.cookiejar import LoadError, LWPCookieJar
from dataclasses import dataclass
from http.cookiejar import CookieJar, LoadError, LWPCookieJar
from os import chmod, path
from platform import python_version

from earwigbot import __version__
from earwigbot.exceptions import SiteNotFoundError
from earwigbot.wiki.copyvios.exclusions import ExclusionsDB
from earwigbot.wiki.site import Site
from earwigbot.wiki.site import Site, SqlConnInfo

if typing.TYPE_CHECKING:
from earwigbot.bot import Bot

__all__ = ["SitesDB"]


@dataclass(frozen=True)
class _SiteInfoFromDB:
name: str
project: str
lang: str
base_url: str
article_path: str
script_path: str
sql: SqlConnInfo
namespaces: dict[int, list[str]]


class SitesDB:
"""
**EarwigBot: Wiki Toolset: Sites Database Manager**

This class controls the :file:`sites.db` file, which stores information
about all wiki sites known to the bot. Three public methods act as bridges
between the bot's config files and :py:class:`~earwigbot.wiki.site.Site`
objects:
This class controls the :file:`sites.db` file, which stores information about all
wiki sites known to the bot. Three public methods act as bridges between the bot's
config files and :py:class:`~earwigbot.wiki.site.Site` objects:

- :py:meth:`get_site`: returns a Site object corresponding to a site
- :py:meth:`add_site`: stores a site in the database
- :py:meth:`remove_site`: removes a site from the database

There's usually no need to use this class directly. All public methods
here are available as :py:meth:`bot.wiki.get_site`,
:py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which
use a :file:`sites.db` file located in the same directory as our
:file:`config.yml` file. Lower-level access can be achieved by importing
the manager class (``from earwigbot.wiki import SitesDB``).
There's usually no need to use this class directly. All public methods here are
available as :py:meth:`bot.wiki.get_site`, :py:meth:`bot.wiki.add_site`, and
:py:meth:`bot.wiki.remove_site`, which use a :file:`sites.db` file located in the
same directory as our :file:`config.yml` file. Lower-level access can be achieved
by importing the manager class (``from earwigbot.wiki import SitesDB``).
"""

def __init__(self, bot):
"""Set up the manager with an attribute for the base Bot object."""
def __init__(self, bot: Bot) -> None:
"""
Set up the manager with an attribute for the base Bot object.
"""
self.config = bot.config
self._logger = bot.logger.getChild("wiki")

self._sites = {} # Internal site cache
self._sites: dict[str, Site] = {} # Internal site cache
self._sitesdb = path.join(bot.config.root_dir, "sites.db")
self._cookie_file = path.join(bot.config.root_dir, ".cookies")
self._cookiejar = None
self._cookiejar: CookieJar | None = None

excl_db = path.join(bot.config.root_dir, "exclusions.db")
excl_logger = self._logger.getChild("exclusionsdb")
self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger)

def __repr__(self):
"""Return the canonical string representation of the SitesDB."""
def __repr__(self) -> str:
"""
Return the canonical string representation of the SitesDB.
"""
res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})"
return res.format(self.config, self._sitesdb, self._cookie_file)

def __str__(self):
"""Return a nice string representation of the SitesDB."""
def __str__(self) -> str:
"""
Return a nice string representation of the SitesDB.
"""
return f"<SitesDB at {self._sitesdb}>"

def _get_cookiejar(self):
"""Return a LWPCookieJar object loaded from our .cookies file.
def _get_cookiejar(self) -> CookieJar:
"""
Return a LWPCookieJar object loaded from our .cookies file.

The same .cookies file is returned every time, located in the project
root, same directory as config.yml and bot.py. If it doesn't exist, we
will create the file and set it to be readable and writeable only by
us. If it exists but the information inside is bogus, we'll ignore it.
The same .cookies file is returned every time, located in the project root,
same directory as config.yml and bot.py. If it doesn't exist, we will create
the file and set it to be readable and writeable only by us. If it exists but
the information inside is bogus, we'll ignore it.

This is normally called by _make_site_object() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor,
used when it makes API queries. This way, we can easily preserve
cookies between sites (e.g., for CentralAuth), making logins easier.
This is normally called by _make_site_object() (in turn called by get_site()),
and the cookiejar is passed to our Site's constructor, used when it makes API
queries. This way, we can easily preserve cookies between sites (e.g., for
CentralAuth), making logins easier.
"""
if self._cookiejar:
return self._cookiejar
@@ -111,8 +135,10 @@ class SitesDB:

return self._cookiejar

def _create_sitesdb(self):
"""Initialize the sitesdb file with its three necessary tables."""
def _create_sitesdb(self) -> None:
"""
Initialize the sitesdb file with its three necessary tables.
"""
script = """
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url,
site_article_path, site_script_path);
@@ -122,11 +148,12 @@ class SitesDB:
with sqlite.connect(self._sitesdb) as conn:
conn.executescript(script)

def _get_site_object(self, name):
"""Return the site from our cache, or create it if it doesn't exist.
def _get_site_object(self, name: str) -> Site:
"""
Return the site from our cache, or create it if it doesn't exist.

This is essentially just a wrapper around _make_site_object that
returns the same object each time a specific site is asked for.
This is essentially just a wrapper around _make_site_object that returns the
same object each time a specific site is asked for.
"""
try:
return self._sites[name]
@@ -135,14 +162,12 @@ class SitesDB:
self._sites[name] = site
return site

def _load_site_from_sitesdb(self, name):
"""Return all information stored in the sitesdb relating to given site.
def _load_site_from_sitesdb(self, name: str) -> _SiteInfoFromDB:
"""
Return all information stored in the sitesdb relating to given site.

The information will be returned as a tuple, containing the site's
name, project, language, base URL, article path, script path, SQL
connection data, and namespaces, in that order. If the site is not
found in the database, SiteNotFoundError will be raised. An empty
database will be created before the exception is raised if none exists.
If the site is not found in the database, SiteNotFoundError will be raised. An
empty database will be created before the exception is raised if none exists.
"""
query1 = "SELECT * FROM sites WHERE site_name = ?"
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
@@ -161,7 +186,7 @@ class SitesDB:

name, project, lang, base_url, article_path, script_path = site_data
sql = dict(sql_data)
namespaces = {}
namespaces: dict[int, list[str]] = {}
for ns_id, ns_name, ns_is_primary_name in ns_data:
try:
if ns_is_primary_name: # "Primary" name goes first in list
@@ -171,7 +196,7 @@ class SitesDB:
except KeyError:
namespaces[ns_id] = [ns_name]

return (
return _SiteInfoFromDB(
name,
project,
lang,
@@ -182,16 +207,16 @@ class SitesDB:
namespaces,
)

def _make_site_object(self, name):
"""Return a Site object associated with the site *name* in our sitesdb.
def _make_site_object(self, name: str) -> Site:
"""
Return a Site object associated with the site *name* in our sitesdb.

This calls _load_site_from_sitesdb(), so SiteNotFoundError will be
raised if the site is not in our sitesdb.
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised if
the site is not in our sitesdb.
"""
cookiejar = self._get_cookiejar()
(name, project, lang, base_url, article_path, script_path, sql, namespaces) = (
self._load_site_from_sitesdb(name)
)
info = self._load_site_from_sitesdb(name)
name = info.name

config = self.config
login = (config.wiki.get("username"), config.wiki.get("password"))
@@ -213,6 +238,7 @@ class SitesDB:
search_config["nltk_dir"] = nltk_dir
search_config["exclusions_db"] = self._exclusions_db

sql = info.sql
if not sql:
sql = config.wiki.get("sql", OrderedDict()).copy()
for key, value in sql.items():
@@ -221,13 +247,13 @@ class SitesDB:

return Site(
name=name,
project=project,
lang=lang,
base_url=base_url,
article_path=article_path,
script_path=script_path,
project=info.project,
lang=info.lang,
base_url=info.base_url,
article_path=info.article_path,
script_path=info.script_path,
sql=sql,
namespaces=namespaces,
namespaces=info.namespaces,
login=login,
oauth=oauth,
cookiejar=cookiejar,
@@ -240,18 +266,18 @@ class SitesDB:
search_config=search_config,
)

def _get_site_name_from_sitesdb(self, project, lang):
"""Return the name of the first site with the given project and lang.
def _get_site_name_from_sitesdb(self, project: str, lang: str) -> str | None:
"""
Return the name of the first site with the given project and lang.

If we can't find the site with the given information, we'll also try
searching for a site whose base_url contains "{lang}.{project}". There
are a few sites, like the French Wikipedia, that set their project to
something other than the expected "wikipedia" ("wikipédia" in this
case), but we should correctly find them when doing get_site(lang="fr",
project="wikipedia").
If we can't find the site with the given information, we'll also try searching
for a site whose base_url contains "{lang}.{project}". There are a few sites,
like the French Wikipedia, that set their project to something other than the
expected "wikipedia" ("wikipédia" in this case), but we should correctly find
them when doing get_site(lang="fr", project="wikipedia").

If the site is not found, return None. An empty sitesdb will be created
if none exists.
If the site is not found, return None. An empty sitesdb will be created if
none exists.
"""
query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?"
query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?"
@@ -267,26 +293,27 @@ class SitesDB:
except sqlite.OperationalError:
self._create_sitesdb()

def _add_site_to_sitesdb(self, site):
"""Extract relevant info from a Site object and add it to the sitesdb.
def _add_site_to_sitesdb(self, site: Site) -> None:
"""
Extract relevant info from a Site object and add it to the sitesdb.

Works like a reverse _load_site_from_sitesdb(); the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces are extracted from the site and inserted into the sites
database. If the sitesdb doesn't exist, we'll create it first.
Works like a reverse _load_site_from_sitesdb(); the site's project, language,
base URL, article path, script path, SQL connection data, and namespaces are
extracted from the site and inserted into the sites database. If the sitesdb
doesn't exist, we'll create it first.
"""
name = site.name
sites_data = (
name,
site.project,
site.lang,
site._base_url,
site._article_path,
site._script_path,
site.base_url,
site.article_path,
site.script_path,
)
sql_data = [(name, key, val) for key, val in site._sql_data.items()]
ns_data = []
for ns_id, ns_names in site._namespaces.items():
ns_data: list[tuple[str, int, str, bool]] = []
for ns_id, ns_names in site.namespaces.items():
ns_data.append((name, ns_id, ns_names.pop(0), True))
for ns_name in ns_names:
ns_data.append((name, ns_id, ns_name, False))
@@ -306,8 +333,10 @@ class SitesDB:
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data)
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data)

def _remove_site_from_sitesdb(self, name):
"""Remove a site by name from the sitesdb and the internal cache."""
def _remove_site_from_sitesdb(self, name: str) -> bool:
"""
Remove a site by name from the sitesdb and the internal cache.
"""
try:
del self._sites[name]
except KeyError:
@@ -323,30 +352,34 @@ class SitesDB:
self._logger.info(f"Removed site '{name}'")
return True

def get_site(self, name=None, project=None, lang=None):
"""Return a Site instance based on information from the sitesdb.
def get_site(
self,
name: str | None = None,
project: str | None = None,
lang: str | None = None,
) -> Site:
"""
Return a Site instance based on information from the sitesdb.

With no arguments, return the default site as specified by our config
file. This is ``config.wiki["defaultSite"]``.
With no arguments, return the default site as specified by our config file.
This is ``config.wiki["defaultSite"]``.

With *name* specified, return the site with that name. This is
equivalent to the site's ``wikiid`` in the API, like *enwiki*.
With *name* specified, return the site with that name. This is equivalent to
the site's ``wikiid`` in the API, like *enwiki*.

With *project* and *lang* specified, return the site whose project and
language match these values. If there are multiple sites with the same
values (unlikely), this is not a reliable way of loading a site. Call
the function with an explicit *name* in that case.
With *project* and *lang* specified, return the site whose project and language
match these values. If there are multiple sites with the same values
(unlikely), this is not a reliable way of loading a site. Call the function
with an explicit *name* in that case.

We will attempt to login to the site automatically using
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are
defined.

Specifying a project without a lang or a lang without a project will
raise :py:exc:`TypeError`. If all three args are specified, *name* will
be first tried, then *project* and *lang* if *name* doesn't work. If a
site cannot be found in the sitesdb,
:py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An
empty sitesdb will be created if none is found.
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are defined.

Specifying a project without a lang or a lang without a project will raise
:py:exc:`TypeError`. If all three args are specified, *name* will be first
tried, then *project* and *lang* if *name* doesn't work. If a site cannot be
found in the sitesdb, :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be
raised. An empty sitesdb will be created if none is found.
"""
# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):
@@ -374,6 +407,7 @@ class SitesDB:
raise

# If we end up here, then project and lang are the only args given:
assert project is not None and lang is not None, (project, lang)
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._get_site_object(name)
@@ -381,30 +415,34 @@ class SitesDB:
raise SiteNotFoundError(e)

def add_site(
self, project=None, lang=None, base_url=None, script_path="/w", sql=None
):
"""Add a site to the sitesdb so it can be retrieved with get_site().
self,
project: str | None = None,
lang: str | None = None,
base_url: str | None = None,
script_path: str = "/w",
sql: SqlConnInfo | None = None,
) -> Site:
"""
Add a site to the sitesdb so it can be retrieved with get_site().

If only a project and a lang are given, we'll guess the *base_url* as
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming
``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``).
If this is wrong, provide the correct *base_url* as an argument (in
which case project and lang are ignored). Most wikis use ``"/w"`` as
the script path (meaning the API is located at
``"{base_url}{script_path}/api.php"`` ->
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If
your wiki is different, provide the script_path as an argument. SQL
connection settings are guessed automatically using config's template
value. If this is wrong or not specified, provide a dict of kwargs as
*sql* and Site will pass it to :py:func:`pymysql.connect(**sql)
<pymysql.connect>`, allowing you to make queries with
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.

Returns ``True`` if the site was added successfully or ``False`` if the
site is already in our sitesdb (this can be done purposefully to update
old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError`
if not enough information has been provided to identify the site (e.g.
a *project* but not a *lang*).
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming ``"https"``
if *useHTTPS* is ``True`` in config otherwise ``"http"``). If this is wrong,
provide the correct *base_url* as an argument (in which case project and lang
are ignored). Most wikis use ``"/w"`` as the script path (meaning the API is
located at ``"{base_url}{script_path}/api.php"`` ->
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If your wiki
is different, provide the script_path as an argument. SQL connection settings
are guessed automatically using config's template value. If this is wrong or
not specified, provide a dict of kwargs as *sql* and Site will pass it to
:py:func:`pymysql.connect(**sql) <pymysql.connect>`, allowing you to make
queries with :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.

Returns ``True`` if the site was added successfully or ``False`` if the site is
already in our sitesdb (this can be done purposefully to update old site info).
Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` if not enough
information has been provided to identify the site (e.g. a *project* but not
a *lang*).
"""
if not base_url:
if not project or not lang:
@@ -445,7 +483,12 @@ class SitesDB:
self._add_site_to_sitesdb(site)
return self._get_site_object(site.name)

def remove_site(self, name=None, project=None, lang=None):
def remove_site(
self,
name: str | None = None,
project: str | None = None,
lang: str | None = None,
) -> bool:
"""Remove a site from the sitesdb.

Returns ``True`` if the site was removed successfully or ``False`` if


+ 129
- 103
earwigbot/wiki/user.py View File

@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -18,14 +18,21 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from logging import NullHandler, getLogger
from socket import AF_INET, AF_INET6, inet_pton
from time import gmtime, strptime
from __future__ import annotations

import socket
import time
import typing
from logging import Logger, NullHandler, getLogger
from typing import Any, Literal

from earwigbot.exceptions import UserNotFoundError
from earwigbot.wiki import constants
from earwigbot.wiki.page import Page

if typing.TYPE_CHECKING:
from earwigbot.wiki.site import Site

__all__ = ["User"]


@@ -33,10 +40,9 @@ class User:
"""
**EarwigBot: Wiki Toolset: User**

Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has
methods for getting a bunch of information about the user, such as
editcount and user rights, methods for returning the user's userpage and
talkpage, etc.
Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for
getting a bunch of information about the user, such as editcount and user rights,
methods for returning the user's userpage and talkpage, etc.

*Attributes:*

@@ -56,24 +62,23 @@ class User:
*Public methods:*

- :py:meth:`reload`: forcibly reloads the user's attributes
- :py:meth:`get_userpage`: returns a Page object representing the user's
userpage
- :py:meth:`get_talkpage`: returns a Page object representing the user's
talkpage
- :py:meth:`get_userpage`: returns a Page object representing the user's userpage
- :py:meth:`get_talkpage`: returns a Page object representing the user's talkpage
"""

def __init__(self, site, name, logger=None):
"""Constructor for new User instances.
def __init__(self, site: Site, name: str, logger: Logger | None = None) -> None:
"""
Constructor for new User instances.

Takes two arguments, a Site object (necessary for doing API queries),
and the name of the user, preferably without "User:" in front, although
this prefix will be automatically removed by the API if given.
Takes two arguments, a Site object (necessary for doing API queries), and the
name of the user, preferably without "User:" in front, although this prefix
will be automatically removed by the API if given.

You can also use site.get_user() instead, which returns a User object,
and is preferred.
You can also use site.get_user() instead, which returns a User object, and
is preferred.

We won't do any API queries yet for basic information about the user -
save that for when the information is requested.
We won't do any API queries yet for basic information about the user - save
that for when the information is requested.
"""
self._site = site
self._name = name
@@ -85,22 +90,27 @@ class User:
self._logger = getLogger("earwigbot.wiki")
self._logger.addHandler(NullHandler())

def __repr__(self):
"""Return the canonical string representation of the User."""
def __repr__(self) -> str:
"""
Return the canonical string representation of the User.
"""
return f"User(name={self._name!r}, site={self._site!r})"

def __str__(self):
"""Return a nice string representation of the User."""
def __str__(self) -> str:
"""
Return a nice string representation of the User.
"""
return f'<User "{self.name}" of {str(self.site)}>'

def _get_attribute(self, attr):
"""Internally used to get an attribute by name.
def _get_attribute(self, attr: str) -> Any:
"""
Internally used to get an attribute by name.

We'll call _load_attributes() to get this (and all other attributes)
from the API if it is not already defined.
We'll call _load_attributes() to get this (and all other attributes) from the
API if it is not already defined.

Raises UserNotFoundError if a nonexistant user prevents us from
returning a certain attribute.
Raises UserNotFoundError if a nonexistant user prevents us from returning a
certain attribute.
"""
if not hasattr(self, attr):
self._load_attributes()
@@ -109,11 +119,12 @@ class User:
raise UserNotFoundError(e)
return getattr(self, attr)

def _load_attributes(self):
"""Internally used to load all attributes from the API.
def _load_attributes(self) -> None:
"""
Internally used to load all attributes from the API.

Normally, this is called by _get_attribute() when a requested attribute
is not defined. This defines it.
Normally, this is called by _get_attribute() when a requested attribute is not
defined. This defines it.
"""
props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
result = self.site.api_query(
@@ -150,11 +161,11 @@ class User:

reg = res["registration"]
try:
self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ")
self._registration = time.strptime(reg, "%Y-%m-%dT%H:%M:%SZ")
except TypeError:
# Sometimes the API doesn't give a date; the user's probably really
# old. There's nothing else we can do!
self._registration = gmtime(0)
self._registration = time.gmtime(0)

try:
res["emailable"]
@@ -166,24 +177,28 @@ class User:
self._gender = res["gender"]

@property
def site(self):
"""The user's corresponding Site object."""
def site(self) -> Site:
"""
The user's corresponding Site object.
"""
return self._site

@property
def name(self):
"""The user's username.
def name(self) -> str:
"""
The user's username.

This will never make an API query on its own, but if one has already
been made by the time this is retrieved, the username may have been
"normalized" from the original input to the constructor, converted into
a Unicode object, with underscores removed, etc.
This will never make an API query on its own, but if one has already been made
by the time this is retrieved, the username may have been "normalized" from the
original input to the constructor, converted into a Unicode object, with
underscores removed, etc.
"""
return self._name

@property
def exists(self):
"""``True`` if the user exists, or ``False`` if they do not.
def exists(self) -> bool:
"""
``True`` if the user exists, or ``False`` if they do not.

Makes an API query only if we haven't made one already.
"""
@@ -192,124 +207,135 @@ class User:
return self._exists

@property
def userid(self):
"""An integer ID used by MediaWiki to represent the user.
def userid(self) -> int:
"""
An integer ID used by MediaWiki to represent the user.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_userid")

@property
def blockinfo(self):
"""Information about any current blocks on the user.
def blockinfo(self) -> dict[str, Any] | Literal[False]:
"""
Information about any current blocks on the user.

If the user is not blocked, returns ``False``. If they are, returns a
dict with three keys: ``"by"`` is the blocker's username, ``"reason"``
is the reason why they were blocked, and ``"expiry"`` is when the block
expires.
If the user is not blocked, returns ``False``. If they are, returns a dict with
three keys: ``"by"`` is the blocker's username, ``"reason"`` is the reason why
they were blocked, and ``"expiry"`` is when the block expires.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_blockinfo")

@property
def groups(self):
"""A list of groups this user is in, including ``"*"``.
def groups(self) -> list[str]:
"""
A list of groups this user is in, including ``"*"``.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_groups")

@property
def rights(self):
"""A list of this user's rights.
def rights(self) -> list[str]:
"""
A list of this user's rights.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_rights")

@property
def editcount(self):
"""Returns the number of edits made by the user.
def editcount(self) -> int:
"""
Returns the number of edits made by the user.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_editcount")

@property
def registration(self):
"""The time the user registered as a :py:class:`time.struct_time`.
def registration(self) -> time.struct_time:
"""
The time the user registered as a :py:class:`time.struct_time`.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_registration")

@property
def emailable(self):
"""``True`` if the user can be emailed, or ``False`` if they cannot.
def emailable(self) -> bool:
"""
``True`` if the user can be emailed, or ``False`` if they cannot.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_emailable")

@property
def gender(self):
"""The user's gender.
def gender(self) -> str:
"""
The user's gender.

Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they
did not specify it.
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they did not
specify it.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not
exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_gender")

@property
def is_ip(self):
"""``True`` if the user is an IP address, or ``False`` otherwise.
def is_ip(self) -> bool:
"""
``True`` if the user is an IP address, or ``False`` otherwise.

This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the
username. No API queries are made.
This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the username.
No API queries are made.
"""
try:
inet_pton(AF_INET, self.name)
socket.inet_pton(socket.AF_INET, self.name)
except OSError:
try:
inet_pton(AF_INET6, self.name)
socket.inet_pton(socket.AF_INET6, self.name)
except OSError:
return False
return True

def reload(self):
"""Forcibly reload the user's attributes.
def reload(self) -> None:
"""
Forcibly reload the user's attributes.

Emphasis on *reload*: this is only necessary if there is reason to
believe they have changed.
Emphasis on *reload*: this is only necessary if there is reason to believe they
have changed.
"""
self._load_attributes()

def get_userpage(self):
"""Return a Page object representing the user's userpage.
def get_userpage(self) -> Page:
"""
Return a Page object representing the user's userpage.

No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
No checks are made to see if it exists or not. Proper site namespace conventions
are followed.
"""
prefix = self.site.namespace_id_to_name(constants.NS_USER)
pagename = ":".join((prefix, self._name))
return Page(self.site, pagename)

def get_talkpage(self):
"""Return a Page object representing the user's talkpage.
def get_talkpage(self) -> Page:
"""
Return a Page object representing the user's talkpage.

No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
No checks are made to see if it exists or not. Proper site namespace conventions
are followed.
"""
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
pagename = ":".join((prefix, self._name))


Loading…
Cancel
Save