@@ -1,6 +1,6 @@ | |||
v0.4 (unreleased): | |||
- Migrated to Python 3 (3.11+). | |||
- Migrated to Python 3 (3.11+). Substantial code cleanup. | |||
- Migrated from oursql to pymysql. | |||
- Copyvios: Configurable proxy support for specific domains. | |||
- Copyvios: Parser-directed URL redirection. | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,15 +20,16 @@ | |||
""" | |||
`EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits | |||
Wikipedia and interacts with people over IRC. | |||
Wikipedia and interacts over IRC. | |||
See :file:`README.rst` for an overview, or the :file:`docs/` directory for | |||
details. This documentation is also available `online | |||
<https://packages.python.org/earwigbot>`_. | |||
See :file:`README.rst` for an overview, or the :file:`docs/` directory for details. | |||
This documentation is also available `online <https://packages.python.org/earwigbot>`_. | |||
""" | |||
import typing | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2009-2019 Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2009-2024 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.4.dev0" | |||
__email__ = "ben.kurtovic@gmail.com" | |||
@@ -57,12 +58,26 @@ from earwigbot import lazy | |||
importer = lazy.LazyImporter() | |||
bot = importer.new("earwigbot.bot") | |||
commands = importer.new("earwigbot.commands") | |||
config = importer.new("earwigbot.config") | |||
exceptions = importer.new("earwigbot.exceptions") | |||
irc = importer.new("earwigbot.irc") | |||
managers = importer.new("earwigbot.managers") | |||
tasks = importer.new("earwigbot.tasks") | |||
util = importer.new("earwigbot.util") | |||
wiki = importer.new("earwigbot.wiki") | |||
if typing.TYPE_CHECKING: | |||
from earwigbot import ( | |||
bot, | |||
commands, | |||
config, | |||
exceptions, | |||
irc, | |||
managers, | |||
tasks, | |||
util, | |||
wiki, | |||
) | |||
else: | |||
bot = importer.new("earwigbot.bot") | |||
commands = importer.new("earwigbot.commands") | |||
config = importer.new("earwigbot.config") | |||
exceptions = importer.new("earwigbot.exceptions") | |||
irc = importer.new("earwigbot.irc") | |||
managers = importer.new("earwigbot.managers") | |||
tasks = importer.new("earwigbot.tasks") | |||
util = importer.new("earwigbot.util") | |||
wiki = importer.new("earwigbot.wiki") |
@@ -107,6 +107,9 @@ class APIError(ServiceError): | |||
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. | |||
""" | |||
code: str | |||
info: str | |||
class SQLError(ServiceError): | |||
"""Some error involving SQL querying occurred. | |||
@@ -43,13 +43,14 @@ JobKwargs = TypedDict( | |||
"nocreate": NotRequired[bool], | |||
"recursive": NotRequired[bool | int], | |||
"tag-categories": NotRequired[bool], | |||
"not-in-category": NotRequired[str], | |||
"site": NotRequired[str], | |||
"dry-run": NotRequired[bool], | |||
}, | |||
) | |||
@dataclass | |||
@dataclass(frozen=True) | |||
class Job: | |||
""" | |||
Represents a single wikiproject-tagging task. | |||
@@ -68,11 +69,20 @@ class Job: | |||
only_with: set[str] | None | |||
nocreate: bool | |||
tag_categories: bool | |||
not_in_category: str | None | |||
dry_run: bool | |||
counter: int = 0 | |||
_counter: list[int] = [0] # Wrap to allow frozen updates | |||
processed_cats: set[str] = field(default_factory=set) | |||
processed_pages: set[str] = field(default_factory=set) | |||
skip_pages: set[str] = field(default_factory=set) | |||
@property | |||
def counter(self) -> int: | |||
return self._counter[0] | |||
def add_to_counter(self, value: int) -> None: | |||
self._counter[0] += value | |||
class ShutoffEnabled(Exception): | |||
@@ -90,7 +100,7 @@ class WikiProjectTagger(Task): | |||
Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER | |||
[--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS] | |||
[--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]] | |||
[--site SITE] [--dry-run]` | |||
[--not-in-category CAT] [--site SITE] [--dry-run]` | |||
.. glossary:: | |||
@@ -126,6 +136,8 @@ class WikiProjectTagger(Task): | |||
``NUM`` isn't provided, go infinitely (this can be dangerous) | |||
``--tag-categories`` | |||
also tag category pages | |||
``--not-in-category CAT`` | |||
skip talk pages that are already members of this category | |||
``--site SITE`` | |||
the ID of the site to tag pages on, defaulting to the default site | |||
``--dry-run`` | |||
@@ -189,6 +201,7 @@ class WikiProjectTagger(Task): | |||
nocreate = kwargs.get("nocreate", False) | |||
recursive = kwargs.get("recursive", 0) | |||
tag_categories = kwargs.get("tag-categories", False) | |||
not_in_category = kwargs.get("not-in-category") | |||
dry_run = kwargs.get("dry-run", False) | |||
banner, names = self.get_names(site, banner) | |||
if not names: | |||
@@ -210,6 +223,7 @@ class WikiProjectTagger(Task): | |||
only_with=only_with, | |||
nocreate=nocreate, | |||
tag_categories=tag_categories, | |||
not_in_category=not_in_category, | |||
dry_run=dry_run, | |||
) | |||
@@ -224,6 +238,11 @@ class WikiProjectTagger(Task): | |||
""" | |||
Run a tagging *job* on a given *site*. | |||
""" | |||
if job.not_in_category: | |||
skip_category = site.get_category(job.not_in_category) | |||
for page in skip_category.get_members(): | |||
job.skip_pages.add(page.title) | |||
if "category" in kwargs: | |||
title = kwargs["category"] | |||
title = self.guess_namespace(site, title, constants.NS_CATEGORY) | |||
@@ -322,6 +341,10 @@ class WikiProjectTagger(Task): | |||
if not page.is_talkpage: | |||
page = page.toggle_talk() | |||
if page.title in job.skip_pages: | |||
self.logger.debug(f"Skipping page, in category to skip: [[{page.title}]]") | |||
return | |||
if page.title in job.processed_pages: | |||
self.logger.debug(f"Skipping page, already processed: [[{page.title}]]") | |||
return | |||
@@ -330,7 +353,7 @@ class WikiProjectTagger(Task): | |||
if job.counter % 10 == 0: # Do a shutoff check every ten pages | |||
if self.shutoff_enabled(page.site): | |||
raise ShutoffEnabled() | |||
job.counter += 1 | |||
job.add_to_counter(1) | |||
try: | |||
code = page.parse() | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,6 +18,9 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from collections.abc import Iterator | |||
from earwigbot.wiki.constants import Service | |||
from earwigbot.wiki.page import Page | |||
__all__ = ["Category"] | |||
@@ -27,14 +30,14 @@ class Category(Page): | |||
""" | |||
**EarwigBot: Wiki Toolset: Category** | |||
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a | |||
subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional | |||
methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should | |||
work fine on :py:class:`Category` objects. :py:meth:`site.get_page() | |||
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` | |||
instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in | |||
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` | |||
is shorthand, accepting category names without the namespace prefix. | |||
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a subclass | |||
of :py:class:`~earwigbot.wiki.page.Page`. Provides additional methods, but | |||
:py:class:`~earwigbot.wiki.page.Page`'s own methods should work fine on | |||
:py:class:`Category` objects. :py:meth:`site.get_page() | |||
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` instead of | |||
a :py:class:`~earwigbot.wiki.page.Page` if the given title is in the category | |||
namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` is shorthand, | |||
accepting category names without the namespace prefix. | |||
*Attributes:* | |||
@@ -48,22 +51,30 @@ class Category(Page): | |||
- :py:meth:`get_members`: iterates over Pages in the category | |||
""" | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Category.""" | |||
def __repr__(self) -> str: | |||
""" | |||
Return the canonical string representation of the Category. | |||
""" | |||
res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})" | |||
return res.format(self._title, self._follow_redirects, self._site) | |||
def __str__(self): | |||
"""Return a nice string representation of the Category.""" | |||
def __str__(self) -> str: | |||
""" | |||
Return a nice string representation of the Category. | |||
""" | |||
return f'<Category "{self.title}" of {str(self.site)}>' | |||
def __iter__(self): | |||
"""Iterate over all members of the category.""" | |||
def __iter__(self) -> Iterator[Page]: | |||
""" | |||
Iterate over all members of the category. | |||
""" | |||
return self.get_members() | |||
def _get_members_via_api(self, limit, follow): | |||
"""Iterate over Pages in the category using the API.""" | |||
params = { | |||
def _get_members_via_api(self, limit: int | None, follow: bool) -> Iterator[Page]: | |||
""" | |||
Iterate over Pages in the category using the API. | |||
""" | |||
params: dict[str, str | int] = { | |||
"action": "query", | |||
"list": "categorymembers", | |||
"cmtitle": self.title, | |||
@@ -84,8 +95,10 @@ class Category(Page): | |||
else: | |||
break | |||
def _get_members_via_sql(self, limit, follow): | |||
"""Iterate over Pages in the category using SQL.""" | |||
def _get_members_via_sql(self, limit: int | None, follow: bool) -> Iterator[Page]: | |||
""" | |||
Iterate over Pages in the category using SQL. | |||
""" | |||
query = """SELECT page_title, page_namespace, page_id FROM page | |||
JOIN categorylinks ON page_id = cl_from | |||
WHERE cl_to = ?""" | |||
@@ -107,16 +120,20 @@ class Category(Page): | |||
title = base | |||
yield self.site.get_page(title, follow_redirects=follow, pageid=row[2]) | |||
def _get_size_via_api(self, member_type): | |||
"""Return the size of the category using the API.""" | |||
def _get_size_via_api(self, member_type: str) -> int: | |||
""" | |||
Return the size of the category using the API. | |||
""" | |||
result = self.site.api_query( | |||
action="query", prop="categoryinfo", titles=self.title | |||
) | |||
info = list(result["query"]["pages"].values())[0]["categoryinfo"] | |||
return info[member_type] | |||
def _get_size_via_sql(self, member_type): | |||
"""Return the size of the category using SQL.""" | |||
def _get_size_via_sql(self, member_type: str) -> int: | |||
""" | |||
Return the size of the category using SQL. | |||
""" | |||
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" | |||
title = self.title.replace(" ", "_").split(":", 1)[1] | |||
if member_type == "size": | |||
@@ -126,49 +143,54 @@ class Category(Page): | |||
result = self.site.sql_query(query, (title, member_type[:-1])) | |||
return list(result)[0][0] | |||
def _get_size(self, member_type): | |||
"""Return the size of the category.""" | |||
def _get_size(self, member_type: str) -> int: | |||
""" | |||
Return the size of the category. | |||
""" | |||
services = { | |||
self.site.SERVICE_API: self._get_size_via_api, | |||
self.site.SERVICE_SQL: self._get_size_via_sql, | |||
Service.API: self._get_size_via_api, | |||
Service.SQL: self._get_size_via_sql, | |||
} | |||
return self.site.delegate(services, (member_type,)) | |||
return self.site.delegate(services, member_type) | |||
@property | |||
def size(self): | |||
"""The total number of members in the category. | |||
def size(self) -> int: | |||
""" | |||
The total number of members in the category. | |||
Includes pages, files, and subcats. Equal to :py:attr:`pages` + | |||
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or | |||
SQL depending on which are enabled and the amount of lag on each. This | |||
is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or SQL | |||
depending on which are enabled and the amount of lag on each. This is handled | |||
by :py:meth:`site.delegate() <earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("size") | |||
@property | |||
def pages(self): | |||
"""The number of pages in the category. | |||
def pages(self) -> int: | |||
""" | |||
The number of pages in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
This will use either the API or SQL depending on which are enabled and the | |||
amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("pages") | |||
@property | |||
def files(self): | |||
"""The number of files in the category. | |||
def files(self) -> int: | |||
""" | |||
The number of files in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
This will use either the API or SQL depending on which are enabled and the | |||
amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("files") | |||
@property | |||
def subcats(self): | |||
"""The number of subcategories in the category. | |||
def subcats(self) -> int: | |||
""" | |||
The number of subcategories in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
@@ -176,36 +198,38 @@ class Category(Page): | |||
""" | |||
return self._get_size("subcats") | |||
def get_members(self, limit=None, follow_redirects=None): | |||
"""Iterate over Pages in the category. | |||
def get_members( | |||
self, limit: int | None = None, follow_redirects: bool | None = None | |||
) -> Iterator[Page]: | |||
""" | |||
Iterate over Pages in the category. | |||
If *limit* is given, we will provide this many pages, or less if the | |||
category is smaller. By default, *limit* is ``None``, meaning we will | |||
keep iterating over members until the category is exhausted. | |||
*follow_redirects* is passed directly to :py:meth:`site.get_page() | |||
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which | |||
will use the value passed to our :py:meth:`__init__`. | |||
If *limit* is given, we will provide this many pages, or less if the category | |||
is smaller. By default, *limit* is ``None``, meaning we will keep iterating | |||
over members until the category is exhausted. *follow_redirects* is passed | |||
directly to :py:meth:`site.get_page() <earwigbot.wiki.site.Site.get_page>`; | |||
it defaults to ``None``, which will use the value passed to our | |||
:py:meth:`__init__`. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
This will use either the API or SQL depending on which are enabled and the | |||
amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
.. note:: | |||
Be careful when iterating over very large categories with no limit. | |||
If using the API, at best, you will make one query per 5000 pages, | |||
which can add up significantly for categories with hundreds of | |||
thousands of members. As for SQL, note that *all page titles are | |||
stored internally* as soon as the query is made, so the site-wide | |||
SQL lock can be freed and unrelated queries can be made without | |||
requiring a separate connection to be opened. This is generally not | |||
an issue unless your category's size approaches several hundred | |||
Be careful when iterating over very large categories with no limit. If using | |||
the API, at best, you will make one query per 5000 pages, which can add up | |||
significantly for categories with hundreds of thousands of members. As for | |||
SQL, note that *all page titles are stored internally* as soon as the query | |||
is made, so the site-wide SQL lock can be freed and unrelated queries can be | |||
made without requiring a separate connection to be opened. This is generally | |||
not an issue unless your category's size approaches several hundred | |||
thousand, in which case the sheer number of titles in memory becomes | |||
problematic. | |||
""" | |||
services = { | |||
self.site.SERVICE_API: self._get_members_via_api, | |||
self.site.SERVICE_SQL: self._get_members_via_sql, | |||
Service.API: self._get_members_via_api, | |||
Service.SQL: self._get_members_via_sql, | |||
} | |||
if follow_redirects is None: | |||
follow_redirects = self._follow_redirects | |||
return self.site.delegate(services, (limit, follow_redirects)) | |||
return self.site.delegate(services, limit, follow_redirects) |
@@ -31,14 +31,50 @@ Import directly with ``from earwigbot.wiki import constants`` or | |||
:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``). | |||
""" | |||
__all__ = [ | |||
"NS_CATEGORY_TALK", | |||
"NS_CATEGORY", | |||
"NS_DRAFT_TALK", | |||
"NS_DRAFT", | |||
"NS_FILE_TALK", | |||
"NS_FILE", | |||
"NS_HELP_TALK", | |||
"NS_HELP", | |||
"NS_MAIN", | |||
"NS_MEDIA", | |||
"NS_MEDIAWIKI_TALK", | |||
"NS_MEDIAWIKI", | |||
"NS_MODULE_TALK", | |||
"NS_MODULE", | |||
"NS_PORTAL_TALK", | |||
"NS_PORTAL", | |||
"NS_PROJECT_TALK", | |||
"NS_PROJECT", | |||
"NS_SPECIAL", | |||
"NS_TALK", | |||
"NS_TEMPLATE_TALK", | |||
"NS_TEMPLATE", | |||
"NS_USER_TALK", | |||
"NS_USER", | |||
"USER_AGENT", | |||
] | |||
import platform | |||
from enum import Enum | |||
import earwigbot | |||
# Default User Agent when making API queries: | |||
from platform import python_version as _p | |||
USER_AGENT = ( | |||
f"EarwigBot/{earwigbot.__version__} " | |||
f"(Python/{platform.python_version()}; https://github.com/earwig/earwigbot)" | |||
) | |||
from earwigbot import __version__ as _v | |||
USER_AGENT = "EarwigBot/{0} (Python/{1}; https://github.com/earwig/earwigbot)" | |||
USER_AGENT = USER_AGENT.format(_v, _p()) | |||
del _v, _p | |||
class Service(Enum): | |||
API = 1 | |||
SQL = 2 | |||
# Default namespace IDs: | |||
NS_MAIN = 0 | |||
@@ -57,5 +93,13 @@ NS_HELP = 12 | |||
NS_HELP_TALK = 13 | |||
NS_CATEGORY = 14 | |||
NS_CATEGORY_TALK = 15 | |||
NS_PORTAL = 100 | |||
NS_PORTAL_TALK = 101 | |||
NS_DRAFT = 118 | |||
NS_DRAFT_TALK = 119 | |||
NS_MODULE = 828 | |||
NS_MODULE_TALK = 829 | |||
NS_SPECIAL = -1 | |||
NS_MEDIA = -2 |
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,17 +18,27 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import annotations | |||
import hashlib | |||
import re | |||
from hashlib import md5 | |||
from logging import NullHandler, getLogger | |||
from time import gmtime, strftime | |||
from urllib.parse import quote | |||
import time | |||
import typing | |||
import urllib.parse | |||
from collections.abc import Iterable | |||
from logging import Logger, NullHandler, getLogger | |||
from typing import Any | |||
import mwparserfromhell | |||
from earwigbot import exceptions | |||
from earwigbot.exceptions import APIError | |||
from earwigbot.wiki.copyvios import CopyvioMixIn | |||
if typing.TYPE_CHECKING: | |||
from earwigbot.wiki.site import Site | |||
from earwigbot.wiki.user import User | |||
__all__ = ["Page"] | |||
@@ -36,10 +46,10 @@ class Page(CopyvioMixIn): | |||
""" | |||
**EarwigBot: Wiki Toolset: Page** | |||
Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||
methods for getting information about the page, getting page content, and | |||
so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of | |||
:py:class:`Page` with additional methods. | |||
Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for | |||
getting information about the page, getting page content, and so on. | |||
:py:class:`~earwigbot.wiki.category.Category` is a subclass of :py:class:`Page` | |||
with additional methods. | |||
*Attributes:* | |||
@@ -59,20 +69,19 @@ class Page(CopyvioMixIn): | |||
- :py:meth:`reload`: forcibly reloads the page's attributes | |||
- :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa | |||
- :py:meth:`get`: returns the page's content | |||
- :py:meth:`get_redirect_target`: returns the page's destination if it is a | |||
redirect | |||
- :py:meth:`get_creator`: returns a User object representing the first | |||
person to edit the page | |||
- :py:meth:`get_redirect_target`: returns the page's destination if it is a redirect | |||
- :py:meth:`get_creator`: returns a User object representing the first person to | |||
edit the page | |||
- :py:meth:`parse`: parses the page content for templates, links, etc | |||
- :py:meth:`edit`: replaces the page's content or creates a new page | |||
- :py:meth:`add_section`: adds a new section at the bottom of the page | |||
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit | |||
the page, per ``{{bots}}``/``{{nobots}}`` | |||
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit the | |||
page, per ``{{bots}}``/``{{nobots}}`` | |||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: | |||
checks the page for copyright violations | |||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: | |||
checks the page like :py:meth:`copyvio_check`, but against a specific URL | |||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: checks the page | |||
for copyright violations | |||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: checks the | |||
page like :py:meth:`copyvio_check`, but against a specific URL | |||
""" | |||
PAGE_UNKNOWN = 0 | |||
@@ -80,18 +89,26 @@ class Page(CopyvioMixIn): | |||
PAGE_MISSING = 2 | |||
PAGE_EXISTS = 3 | |||
def __init__(self, site, title, follow_redirects=False, pageid=None, logger=None): | |||
"""Constructor for new Page instances. | |||
def __init__( | |||
self, | |||
site: Site, | |||
title: str, | |||
follow_redirects: bool = False, | |||
pageid: int | None = None, | |||
logger: Logger | None = None, | |||
) -> None: | |||
""" | |||
Constructor for new Page instances. | |||
Takes four arguments: a Site object, the Page's title (or pagename), | |||
whether or not to follow redirects (optional, defaults to False), and | |||
a page ID to supplement the title (optional, defaults to None - i.e., | |||
we will have to query the API to get it). | |||
Takes four arguments: a Site object, the Page's title (or pagename), whether or | |||
not to follow redirects (optional, defaults to False), and a page ID to | |||
supplement the title (optional, defaults to None - i.e., we will have to query | |||
the API to get it). | |||
As with User, site.get_page() is preferred. | |||
__init__() will not do any API queries, but it will use basic namespace | |||
logic to determine our namespace ID and if we are a talkpage. | |||
__init__() will not do any API queries, but it will use basic namespace logic | |||
to determine our namespace ID and if we are a talkpage. | |||
""" | |||
super().__init__(site) | |||
self._site = site | |||
@@ -108,16 +125,16 @@ class Page(CopyvioMixIn): | |||
# Attributes to be loaded through the API: | |||
self._exists = self.PAGE_UNKNOWN | |||
self._is_redirect = None | |||
self._lastrevid = None | |||
self._protection = None | |||
self._fullurl = None | |||
self._content = None | |||
self._creator = None | |||
self._is_redirect: bool | None = None | |||
self._lastrevid: int | None = None | |||
self._protection: dict | None = None | |||
self._fullurl: str | None = None | |||
self._content: str | None = None | |||
self._creator: str | None = None | |||
# Attributes used for editing/deleting/protecting/etc: | |||
self._basetimestamp = None | |||
self._starttimestamp = None | |||
self._basetimestamp: str | None = None | |||
self._starttimestamp: str | None = None | |||
# Try to determine the page's namespace using our site's namespace | |||
# converter: | |||
@@ -137,54 +154,60 @@ class Page(CopyvioMixIn): | |||
else: | |||
self._is_talkpage = self._namespace % 2 == 1 | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Page.""" | |||
def __repr__(self) -> str: | |||
""" | |||
Return the canonical string representation of the Page. | |||
""" | |||
res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})" | |||
return res.format(self._title, self._follow_redirects, self._site) | |||
def __str__(self): | |||
"""Return a nice string representation of the Page.""" | |||
def __str__(self) -> str: | |||
""" | |||
Return a nice string representation of the Page. | |||
""" | |||
return f'<Page "{self.title}" of {str(self.site)}>' | |||
def _assert_validity(self): | |||
"""Used to ensure that our page's title is valid. | |||
def _assert_validity(self) -> None: | |||
""" | |||
Used to ensure that our page's title is valid. | |||
If this method is called when our page is not valid (and after | |||
_load_attributes() has been called), InvalidPageError will be raised. | |||
Note that validity != existence. If a page's title is invalid (e.g, it | |||
contains "[") it will always be invalid, and cannot be edited. | |||
Note that validity != existence. If a page's title is invalid (e.g, it contains | |||
"[") it will always be invalid, and cannot be edited. | |||
""" | |||
if self._exists == self.PAGE_INVALID: | |||
e = f"Page '{self._title}' is invalid." | |||
raise exceptions.InvalidPageError(e) | |||
def _assert_existence(self): | |||
"""Used to ensure that our page exists. | |||
def _assert_existence(self) -> None: | |||
""" | |||
Used to ensure that our page exists. | |||
If this method is called when our page doesn't exist (and after | |||
_load_attributes() has been called), PageNotFoundError will be raised. | |||
It will also call _assert_validity() beforehand. | |||
_load_attributes() has been called), PageNotFoundError will be raised. It will | |||
also call _assert_validity() beforehand. | |||
""" | |||
self._assert_validity() | |||
if self._exists == self.PAGE_MISSING: | |||
e = f"Page '{self._title}' does not exist." | |||
raise exceptions.PageNotFoundError(e) | |||
def _load(self): | |||
"""Call _load_attributes() and follows redirects if we're supposed to. | |||
def _load(self) -> None: | |||
""" | |||
Call _load_attributes() and follow redirects if we're supposed to. | |||
This method will only follow redirects if follow_redirects=True was | |||
passed to __init__() (perhaps indirectly passed by site.get_page()). | |||
It avoids the API's &redirects param in favor of manual following, | |||
so we can act more realistically (we don't follow double redirects, and | |||
circular redirects don't break us). | |||
This method will only follow redirects if follow_redirects=True was passed to | |||
__init__() (perhaps indirectly passed by site.get_page()). It avoids the API's | |||
&redirects param in favor of manual following, so we can act more realistically | |||
(we don't follow double redirects, and circular redirects don't break us). | |||
This will raise RedirectError if we have a problem following, but that | |||
is a bug and should NOT happen. | |||
This will raise RedirectError if we have a problem following, but that is a bug | |||
and should NOT happen. | |||
If we're following a redirect, this will make a grand total of three | |||
API queries. It's a lot, but each one is quite small. | |||
If we're following a redirect, this will make a grand total of three API | |||
queries. It's a lot, but each one is quite small. | |||
""" | |||
self._load_attributes() | |||
@@ -194,14 +217,14 @@ class Page(CopyvioMixIn): | |||
self._content = None # reset the content we just loaded | |||
self._load_attributes() | |||
def _load_attributes(self, result=None): | |||
"""Load various data from the API in a single query. | |||
def _load_attributes(self, result: dict | None = None) -> None: | |||
""" | |||
Load various data from the API in a single query. | |||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, | |||
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, and | |||
._starttimestamp using the API. It will do a query of its own unless | |||
*result* is provided, in which case we'll pretend *result* is what the | |||
query returned. | |||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, ._protection, | |||
._namespace, ._is_talkpage, ._creator, ._lastrevid, and ._starttimestamp using | |||
the API. It will do a query of its own unless *result* is provided, in which | |||
case we'll pretend *result* is what the query returned. | |||
Assuming the API is sound, this should not raise any exceptions. | |||
""" | |||
@@ -217,6 +240,7 @@ class Page(CopyvioMixIn): | |||
titles=self._title, | |||
) | |||
assert result is not None | |||
if "interwiki" in result["query"]: | |||
self._title = result["query"]["interwiki"][0]["title"] | |||
self._exists = self.PAGE_INVALID | |||
@@ -242,7 +266,7 @@ class Page(CopyvioMixIn): | |||
self._fullurl = res["fullurl"] | |||
self._protection = res["protection"] | |||
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) | |||
self._starttimestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) | |||
# We've determined the namespace and talkpage status in __init__() | |||
# based on the title, but now we can be sure: | |||
@@ -256,15 +280,15 @@ class Page(CopyvioMixIn): | |||
except KeyError: | |||
pass | |||
def _load_content(self, result=None): | |||
"""Load current page content from the API. | |||
def _load_content(self, result: dict | None = None) -> None: | |||
""" | |||
Load current page content from the API. | |||
If *result* is provided, we'll pretend that is the result of an API | |||
query and try to get content from that. Otherwise, we'll do an API | |||
query on our own. | |||
If *result* is provided, we'll pretend that is the result of an API query and | |||
try to get content from that. Otherwise, we'll do an API query on our own. | |||
Don't call this directly, ever; use reload() followed by get() if you | |||
want to force content reloading. | |||
Don't call this directly, ever; use reload() followed by get() if you want to | |||
force content reloading. | |||
""" | |||
if not result: | |||
query = self.site.api_query | |||
@@ -277,6 +301,7 @@ class Page(CopyvioMixIn): | |||
titles=self._title, | |||
) | |||
assert result is not None | |||
res = list(result["query"]["pages"].values())[0] | |||
try: | |||
revision = res["revisions"][0] | |||
@@ -291,32 +316,32 @@ class Page(CopyvioMixIn): | |||
def _edit( | |||
self, | |||
params=None, | |||
text=None, | |||
summary=None, | |||
minor=None, | |||
bot=None, | |||
force=None, | |||
section=None, | |||
captcha_id=None, | |||
captcha_word=None, | |||
**kwargs, | |||
): | |||
"""Edit the page! | |||
If *params* is given, we'll use it as our API query parameters. | |||
Otherwise, we'll build params using the given kwargs via | |||
_build_edit_params(). | |||
We'll then try to do the API query, and catch any errors the API raises | |||
in _handle_edit_errors(). We'll then throw these back as subclasses of | |||
EditError. | |||
params: dict[str, Any] | None = None, | |||
text: str | None = None, | |||
summary: str | None = None, | |||
minor: bool | None = None, | |||
bot: bool | None = None, | |||
force: bool | None = None, | |||
section: int | str | None = None, | |||
captcha_id: str | None = None, | |||
captcha_word: str | None = None, | |||
**kwargs: Any, | |||
) -> None: | |||
""" | |||
Edit the page! | |||
If *params* is given, we'll use it as our API query parameters. Otherwise, | |||
we'll build params using the given kwargs via _build_edit_params(). | |||
We'll then try to do the API query, and catch any errors the API raises in | |||
_handle_edit_errors(). We'll then throw these back as subclasses of EditError. | |||
""" | |||
# Weed out invalid pages before we get too far: | |||
self._assert_validity() | |||
# Build our API query string: | |||
if not params: | |||
assert text is not None, "Edit text must be provided when params are unset" | |||
params = self._build_edit_params( | |||
text, | |||
summary, | |||
@@ -351,26 +376,26 @@ class Page(CopyvioMixIn): | |||
def _build_edit_params( | |||
self, | |||
text, | |||
summary, | |||
minor, | |||
bot, | |||
force, | |||
section, | |||
captcha_id, | |||
captcha_word, | |||
kwargs, | |||
): | |||
"""Given some keyword arguments, build an API edit query string.""" | |||
unitxt = text.encode("utf8") if isinstance(text, str) else text | |||
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct | |||
text: str, | |||
summary: str | None, | |||
minor: bool | None, | |||
bot: bool | None, | |||
force: bool | None, | |||
section: int | str | None, | |||
captcha_id: str | None, | |||
captcha_word: str | None, | |||
kwargs: dict[str, Any], | |||
) -> dict[str, Any]: | |||
""" | |||
Given some keyword arguments, build an API edit query string. | |||
""" | |||
params = { | |||
"action": "edit", | |||
"title": self._title, | |||
"text": text, | |||
"token": self.site.get_token(), | |||
"summary": summary, | |||
"md5": hashed, | |||
"md5": hashlib.md5(text.encode("utf-8")).hexdigest(), | |||
} | |||
if section: | |||
@@ -403,12 +428,15 @@ class Page(CopyvioMixIn): | |||
params[key] = val | |||
return params | |||
def _handle_edit_errors(self, error, params, retry=True): | |||
"""If our edit fails due to some error, try to handle it. | |||
def _handle_edit_errors( | |||
self, error: APIError, params: dict[str, Any], retry: bool = True | |||
) -> dict: | |||
""" | |||
If our edit fails due to some error, try to handle it. | |||
We'll either raise an appropriate exception (for example, if the page | |||
is protected), or we'll try to fix it (for example, if the token is | |||
invalid, we'll try to get a new one). | |||
We'll either raise an appropriate exception (for example, if the page is | |||
protected), or we'll try to fix it (for example, if the token is invalid, we'll | |||
try to get a new one). | |||
""" | |||
perms = [ | |||
"noedit", | |||
@@ -447,27 +475,31 @@ class Page(CopyvioMixIn): | |||
raise exceptions.EditError(": ".join((error.code, error.info))) | |||
@property | |||
def site(self): | |||
"""The page's corresponding Site object.""" | |||
def site(self) -> Site: | |||
""" | |||
The page's corresponding Site object. | |||
""" | |||
return self._site | |||
@property | |||
def title(self): | |||
"""The page's title, or "pagename". | |||
def title(self) -> str: | |||
""" | |||
The page's title, or "pagename". | |||
This won't do any API queries on its own. Any other attributes or | |||
methods that do API queries will reload the title, however, like | |||
:py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or | |||
following redirects if :py:attr:`self._follow_redirects` is ``True``. | |||
This won't do any API queries on its own. Any other attributes or methods that | |||
do API queries will reload the title, however, like :py:attr:`exists` and | |||
:py:meth:`get`, potentially "normalizing" it or following redirects if | |||
:py:attr:`self._follow_redirects` is ``True``. | |||
""" | |||
return self._title | |||
@property | |||
def exists(self): | |||
"""Whether or not the page exists. | |||
def exists(self) -> int: | |||
""" | |||
Whether or not the page exists. | |||
This will be a number; its value does not matter, but it will equal | |||
one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`, | |||
This will be a number; its value does not matter, but it will equal one of | |||
:py:attr:`self.PAGE_INVALID <PAGE_INVALID>`, | |||
:py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or | |||
:py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`. | |||
@@ -478,55 +510,60 @@ class Page(CopyvioMixIn): | |||
return self._exists | |||
@property | |||
def pageid(self): | |||
"""An integer ID representing the page. | |||
def pageid(self) -> int: | |||
""" | |||
An integer ID representing the page. | |||
Makes an API query only if we haven't already made one and the *pageid* | |||
parameter to :py:meth:`__init__` was left as ``None``, which should be | |||
true for all cases except when pages are returned by an SQL generator | |||
(like :py:meth:`category.get_members() | |||
parameter to :py:meth:`__init__` was left as ``None``, which should be true for | |||
all cases except when pages are returned by an SQL generator (like | |||
:py:meth:`category.get_members() | |||
<earwigbot.wiki.category.Category.get_members>`). | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||
or the page does not exist, respectively. | |||
""" | |||
if self._pageid: | |||
return self._pageid | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
self._assert_existence() # Missing pages do not have IDs | |||
assert self._pageid is not None, "Page exists but does not have an ID" | |||
return self._pageid | |||
@property | |||
def url(self): | |||
"""The page's URL. | |||
def url(self) -> str: | |||
""" | |||
The page's URL. | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||
API was never queried for this page, we will attempt to determine the | |||
URL ourselves based on the title. | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the API was | |||
never queried for this page, we will attempt to determine the URL ourselves | |||
based on the title. | |||
""" | |||
if self._fullurl: | |||
return self._fullurl | |||
else: | |||
encoded = self._title.encode("utf8").replace(" ", "_") | |||
slug = quote(encoded, safe="/:").decode("utf8") | |||
path = self.site._article_path.replace("$1", slug) | |||
encoded = self._title.replace(" ", "_") | |||
slug = urllib.parse.quote(encoded, safe="/:") | |||
path = self.site.article_path.replace("$1", slug) | |||
return "".join((self.site.url, path)) | |||
@property | |||
def namespace(self): | |||
"""The page's namespace ID (an integer). | |||
def namespace(self) -> int: | |||
""" | |||
The page's namespace ID (an integer). | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||
API was never queried for this page, we will attempt to determine the | |||
namespace ourselves based on the title. | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the API was | |||
never queried for this page, we will attempt to determine the namespace | |||
ourselves based on the title. | |||
""" | |||
return self._namespace | |||
@property | |||
def lastrevid(self): | |||
"""The ID of the page's most recent revision. | |||
def lastrevid(self) -> int | None: | |||
""" | |||
The ID of the page's most recent revision. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
@@ -538,14 +575,15 @@ class Page(CopyvioMixIn): | |||
return self._lastrevid | |||
@property | |||
def protection(self): | |||
"""The page's current protection status. | |||
def protection(self) -> dict | None: | |||
""" | |||
The page's current protection status. | |||
Makes an API query only if we haven't already made one. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page | |||
name is invalid. Won't raise an error if the page is missing because | |||
those can still be create-protected. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page name is | |||
invalid. Won't raise an error if the page is missing because those can still be | |||
create-protected. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
@@ -553,17 +591,18 @@ class Page(CopyvioMixIn): | |||
return self._protection | |||
@property | |||
def is_talkpage(self): | |||
"""``True`` if the page is a talkpage, otherwise ``False``. | |||
def is_talkpage(self) -> bool: | |||
""" | |||
``True`` if the page is a talkpage, otherwise ``False``. | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||
API was never queried for this page, we will attempt to determine | |||
whether it is a talkpage ourselves based on its namespace. | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the API was | |||
never queried for this page, we will attempt to determine whether it is a | |||
talkpage ourselves based on its namespace. | |||
""" | |||
return self._is_talkpage | |||
@property | |||
def is_redirect(self): | |||
def is_redirect(self) -> bool: | |||
"""``True`` if the page is a redirect, otherwise ``False``. | |||
Makes an API query only if we haven't already made one. | |||
@@ -572,34 +611,36 @@ class Page(CopyvioMixIn): | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
assert self._is_redirect is not None | |||
return self._is_redirect | |||
def reload(self): | |||
"""Forcibly reload the page's attributes. | |||
def reload(self) -> None: | |||
""" | |||
Forcibly reload the page's attributes. | |||
Emphasis on *reload*: this is only necessary if there is reason to | |||
believe they have changed. | |||
Emphasis on *reload*: this is only necessary if there is reason to believe they | |||
have changed. | |||
""" | |||
self._load() | |||
if self._content is not None: | |||
# Only reload content if it has already been loaded: | |||
self._load_content() | |||
def toggle_talk(self, follow_redirects=None): | |||
"""Return a content page's talk page, or vice versa. | |||
def toggle_talk(self, follow_redirects: bool | None = None) -> Page: | |||
""" | |||
Return a content page's talk page, or vice versa. | |||
The title of the new page is determined by namespace logic, not API | |||
queries. We won't make any API queries on our own. | |||
The title of the new page is determined by namespace logic, not API queries. | |||
We won't make any API queries on our own. | |||
If *follow_redirects* is anything other than ``None`` (the default), it | |||
will be passed to the new :py:class:`~earwigbot.wiki.page.Page` | |||
object's :py:meth:`__init__`. Otherwise, we'll use the value passed to | |||
our own :py:meth:`__init__`. | |||
If *follow_redirects* is anything other than ``None`` (the default), it will be | |||
passed to the new :py:class:`~earwigbot.wiki.page.Page` object's | |||
:py:meth:`__init__`. Otherwise, we'll use the value passed to our own | |||
:py:meth:`__init__`. | |||
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try | |||
to get the talk page of a special page (in the ``Special:`` or | |||
``Media:`` namespaces), but we won't raise an exception if our page is | |||
otherwise missing or invalid. | |||
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try to get | |||
the talk page of a special page (in the ``Special:`` or ``Media:`` namespaces), | |||
but we won't raise an exception if our page is otherwise missing or invalid. | |||
""" | |||
if self._namespace < 0: | |||
ns = self.site.namespace_id_to_name(self._namespace) | |||
@@ -629,11 +670,12 @@ class Page(CopyvioMixIn): | |||
follow_redirects = self._follow_redirects | |||
return Page(self.site, new_title, follow_redirects) | |||
def get(self): | |||
"""Return page content, which is cached if you try to call get again. | |||
def get(self) -> str: | |||
""" | |||
Return page content, which is cached if you try to call get again. | |||
Raises InvalidPageError or PageNotFoundError if the page name is | |||
invalid or the page does not exist, respectively. | |||
Raises InvalidPageError or PageNotFoundError if the page name is invalid or the | |||
page does not exist, respectively. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
# Kill two birds with one stone by doing an API query for both our | |||
@@ -659,6 +701,7 @@ class Page(CopyvioMixIn): | |||
self._exists = self.PAGE_UNKNOWN # Force another API query | |||
self.get() | |||
assert self._content is not None | |||
return self._content | |||
# Make sure we're dealing with a real page here. This may be outdated | |||
@@ -669,16 +712,17 @@ class Page(CopyvioMixIn): | |||
if self._content is None: | |||
self._load_content() | |||
assert self._content is not None | |||
return self._content | |||
def get_redirect_target(self): | |||
"""If the page is a redirect, return its destination. | |||
def get_redirect_target(self) -> str: | |||
""" | |||
If the page is a redirect, return its destination. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. Raises | |||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a | |||
redirect. | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||
or the page does not exist, respectively. Raises | |||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a redirect. | |||
""" | |||
re_redirect = r"^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||
content = self.get() | |||
@@ -688,19 +732,20 @@ class Page(CopyvioMixIn): | |||
e = "The page does not appear to have a redirect target." | |||
raise exceptions.RedirectError(e) | |||
def get_creator(self): | |||
"""Return the User object for the first person to edit the page. | |||
def get_creator(self) -> User: | |||
""" | |||
Return the User object for the first person to edit the page. | |||
Makes an API query only if we haven't already made one. Normally, we | |||
can get the creator along with everything else (except content) in | |||
:py:meth:`_load_attributes`. However, due to a limitation in the API | |||
(can't get the editor of one revision and the content of another at | |||
both ends of the history), if our other attributes were only loaded | |||
through :py:meth:`get`, we'll have to do another API query. | |||
Makes an API query only if we haven't already made one. Normally, we can get | |||
the creator along with everything else (except content) in | |||
:py:meth:`_load_attributes`. However, due to a limitation in the API (can't get | |||
the editor of one revision and the content of another at both ends of the | |||
history), if our other attributes were only loaded through :py:meth:`get`, | |||
we'll have to do another API query. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||
or the page does not exist, respectively. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
@@ -710,41 +755,59 @@ class Page(CopyvioMixIn): | |||
self._assert_existence() | |||
return self.site.get_user(self._creator) | |||
def parse(self): | |||
"""Parse the page content for templates, links, etc. | |||
def parse(self) -> mwparserfromhell.wikicode.Wikicode: | |||
""" | |||
Parse the page content for templates, links, etc. | |||
Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises | |||
:py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||
or the page does not exist, respectively. | |||
""" | |||
return mwparserfromhell.parse(self.get()) | |||
def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs): | |||
"""Replace the page's content or creates a new page. | |||
def edit( | |||
self, | |||
text: str, | |||
summary: str | None, | |||
minor: bool = False, | |||
bot: bool = True, | |||
force: bool = False, | |||
**kwargs: Any, | |||
) -> None: | |||
""" | |||
Replace the page's content or creates a new page. | |||
*text* is the new page content, with *summary* as the edit summary. | |||
If *minor* is ``True``, the edit will be marked as minor. If *bot* is | |||
``True``, the edit will be marked as a bot edit, but only if we | |||
actually have a bot flag. | |||
*text* is the new page content, with *summary* as the edit summary. If *minor* | |||
is ``True``, the edit will be marked as minor. If *bot* is ``True``, the edit | |||
will be marked as a bot edit, but only if we actually have a bot flag. | |||
Use *force* to push the new content even if there's an edit conflict or | |||
the page was deleted/recreated between getting our edit token and | |||
editing our page. Be careful with this! | |||
Use *force* to push the new content even if there's an edit conflict or the | |||
page was deleted/recreated between getting our edit token and editing our page. | |||
Be careful with this! | |||
""" | |||
self._edit( | |||
text=text, summary=summary, minor=minor, bot=bot, force=force, **kwargs | |||
) | |||
def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs): | |||
"""Add a new section to the bottom of the page. | |||
def add_section( | |||
self, | |||
text: str, | |||
title: str, | |||
minor: bool = False, | |||
bot: bool = True, | |||
force: bool = False, | |||
**kwargs: Any, | |||
) -> None: | |||
""" | |||
Add a new section to the bottom of the page. | |||
The arguments for this are the same as those for :py:meth:`edit`, but | |||
instead of providing a summary, you provide a section title. Likewise, | |||
raised exceptions are the same as :py:meth:`edit`'s. | |||
The arguments for this are the same as those for :py:meth:`edit`, but instead | |||
of providing a summary, you provide a section title. Likewise, raised | |||
exceptions are the same as :py:meth:`edit`'s. | |||
This should create the page if it does not already exist, with just the | |||
new section as content. | |||
This should create the page if it does not already exist, with just the new | |||
section as content. | |||
""" | |||
self._edit( | |||
text=text, | |||
@@ -756,25 +819,27 @@ class Page(CopyvioMixIn): | |||
**kwargs, | |||
) | |||
def check_exclusion(self, username=None, optouts=None): | |||
"""Check whether or not we are allowed to edit the page. | |||
def check_exclusion( | |||
self, username: str | None = None, optouts: Iterable[str] | None = None | |||
) -> bool: | |||
""" | |||
Check whether or not we are allowed to edit the page. | |||
Return ``True`` if we *are* allowed to edit this page, and ``False`` if | |||
we aren't. | |||
*username* is used to determine whether we are part of a specific list | |||
of allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or | |||
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which | |||
will swipe our username from :py:meth:`site.get_user() | |||
*username* is used to determine whether we are part of a specific list of | |||
allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or | |||
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which will swipe | |||
our username from :py:meth:`site.get_user() | |||
<earwigbot.wiki.site.Site.get_user>`.\ | |||
:py:attr:`~earwigbot.wiki.user.User.name`. | |||
*optouts* is a list of messages to consider this check as part of for | |||
the purpose of opt-out; it defaults to ``None``, which ignores the | |||
parameter completely. For example, if *optouts* is ``["nolicense"]``, | |||
we'll return ``False`` on ``{{bots|optout=nolicense}}`` or | |||
``{{bots|optout=all}}``, but `True` on | |||
``{{bots|optout=orfud,norationale,replaceable}}``. | |||
*optouts* is a list of messages to consider this check as part of for the | |||
purpose of opt-out; it defaults to ``None``, which ignores the parameter | |||
completely. For example, if *optouts* is ``["nolicense"]``, we'll return | |||
``False`` on ``{{bots|optout=nolicense}}`` or ``{{bots|optout=all}}``, but | |||
`True` on ``{{bots|optout=orfud,norationale,replaceable}}``. | |||
""" | |||
def parse_param(template, param): | |||
@@ -18,78 +18,102 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import annotations | |||
import errno | |||
import sqlite3 as sqlite | |||
import stat | |||
import typing | |||
from collections import OrderedDict | |||
from http.cookiejar import LoadError, LWPCookieJar | |||
from dataclasses import dataclass | |||
from http.cookiejar import CookieJar, LoadError, LWPCookieJar | |||
from os import chmod, path | |||
from platform import python_version | |||
from earwigbot import __version__ | |||
from earwigbot.exceptions import SiteNotFoundError | |||
from earwigbot.wiki.copyvios.exclusions import ExclusionsDB | |||
from earwigbot.wiki.site import Site | |||
from earwigbot.wiki.site import Site, SqlConnInfo | |||
if typing.TYPE_CHECKING: | |||
from earwigbot.bot import Bot | |||
__all__ = ["SitesDB"] | |||
@dataclass(frozen=True) | |||
class _SiteInfoFromDB: | |||
name: str | |||
project: str | |||
lang: str | |||
base_url: str | |||
article_path: str | |||
script_path: str | |||
sql: SqlConnInfo | |||
namespaces: dict[int, list[str]] | |||
class SitesDB: | |||
""" | |||
**EarwigBot: Wiki Toolset: Sites Database Manager** | |||
This class controls the :file:`sites.db` file, which stores information | |||
about all wiki sites known to the bot. Three public methods act as bridges | |||
between the bot's config files and :py:class:`~earwigbot.wiki.site.Site` | |||
objects: | |||
This class controls the :file:`sites.db` file, which stores information about all | |||
wiki sites known to the bot. Three public methods act as bridges between the bot's | |||
config files and :py:class:`~earwigbot.wiki.site.Site` objects: | |||
- :py:meth:`get_site`: returns a Site object corresponding to a site | |||
- :py:meth:`add_site`: stores a site in the database | |||
- :py:meth:`remove_site`: removes a site from the database | |||
There's usually no need to use this class directly. All public methods | |||
here are available as :py:meth:`bot.wiki.get_site`, | |||
:py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which | |||
use a :file:`sites.db` file located in the same directory as our | |||
:file:`config.yml` file. Lower-level access can be achieved by importing | |||
the manager class (``from earwigbot.wiki import SitesDB``). | |||
There's usually no need to use this class directly. All public methods here are | |||
available as :py:meth:`bot.wiki.get_site`, :py:meth:`bot.wiki.add_site`, and | |||
:py:meth:`bot.wiki.remove_site`, which use a :file:`sites.db` file located in the | |||
same directory as our :file:`config.yml` file. Lower-level access can be achieved | |||
by importing the manager class (``from earwigbot.wiki import SitesDB``). | |||
""" | |||
def __init__(self, bot): | |||
"""Set up the manager with an attribute for the base Bot object.""" | |||
def __init__(self, bot: Bot) -> None: | |||
""" | |||
Set up the manager with an attribute for the base Bot object. | |||
""" | |||
self.config = bot.config | |||
self._logger = bot.logger.getChild("wiki") | |||
self._sites = {} # Internal site cache | |||
self._sites: dict[str, Site] = {} # Internal site cache | |||
self._sitesdb = path.join(bot.config.root_dir, "sites.db") | |||
self._cookie_file = path.join(bot.config.root_dir, ".cookies") | |||
self._cookiejar = None | |||
self._cookiejar: CookieJar | None = None | |||
excl_db = path.join(bot.config.root_dir, "exclusions.db") | |||
excl_logger = self._logger.getChild("exclusionsdb") | |||
self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger) | |||
def __repr__(self): | |||
"""Return the canonical string representation of the SitesDB.""" | |||
def __repr__(self) -> str: | |||
""" | |||
Return the canonical string representation of the SitesDB. | |||
""" | |||
res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})" | |||
return res.format(self.config, self._sitesdb, self._cookie_file) | |||
def __str__(self): | |||
"""Return a nice string representation of the SitesDB.""" | |||
def __str__(self) -> str: | |||
""" | |||
Return a nice string representation of the SitesDB. | |||
""" | |||
return f"<SitesDB at {self._sitesdb}>" | |||
def _get_cookiejar(self): | |||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||
def _get_cookiejar(self) -> CookieJar: | |||
""" | |||
Return a LWPCookieJar object loaded from our .cookies file. | |||
The same .cookies file is returned every time, located in the project | |||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||
will create the file and set it to be readable and writeable only by | |||
us. If it exists but the information inside is bogus, we'll ignore it. | |||
The same .cookies file is returned every time, located in the project root, | |||
same directory as config.yml and bot.py. If it doesn't exist, we will create | |||
the file and set it to be readable and writeable only by us. If it exists but | |||
the information inside is bogus, we'll ignore it. | |||
This is normally called by _make_site_object() (in turn called by | |||
get_site()), and the cookiejar is passed to our Site's constructor, | |||
used when it makes API queries. This way, we can easily preserve | |||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||
This is normally called by _make_site_object() (in turn called by get_site()), | |||
and the cookiejar is passed to our Site's constructor, used when it makes API | |||
queries. This way, we can easily preserve cookies between sites (e.g., for | |||
CentralAuth), making logins easier. | |||
""" | |||
if self._cookiejar: | |||
return self._cookiejar | |||
@@ -111,8 +135,10 @@ class SitesDB: | |||
return self._cookiejar | |||
def _create_sitesdb(self): | |||
"""Initialize the sitesdb file with its three necessary tables.""" | |||
def _create_sitesdb(self) -> None: | |||
""" | |||
Initialize the sitesdb file with its three necessary tables. | |||
""" | |||
script = """ | |||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||
site_article_path, site_script_path); | |||
@@ -122,11 +148,12 @@ class SitesDB: | |||
with sqlite.connect(self._sitesdb) as conn: | |||
conn.executescript(script) | |||
def _get_site_object(self, name): | |||
"""Return the site from our cache, or create it if it doesn't exist. | |||
def _get_site_object(self, name: str) -> Site: | |||
""" | |||
Return the site from our cache, or create it if it doesn't exist. | |||
This is essentially just a wrapper around _make_site_object that | |||
returns the same object each time a specific site is asked for. | |||
This is essentially just a wrapper around _make_site_object that returns the | |||
same object each time a specific site is asked for. | |||
""" | |||
try: | |||
return self._sites[name] | |||
@@ -135,14 +162,12 @@ class SitesDB: | |||
self._sites[name] = site | |||
return site | |||
def _load_site_from_sitesdb(self, name): | |||
"""Return all information stored in the sitesdb relating to given site. | |||
def _load_site_from_sitesdb(self, name: str) -> _SiteInfoFromDB: | |||
""" | |||
Return all information stored in the sitesdb relating to given site. | |||
The information will be returned as a tuple, containing the site's | |||
name, project, language, base URL, article path, script path, SQL | |||
connection data, and namespaces, in that order. If the site is not | |||
found in the database, SiteNotFoundError will be raised. An empty | |||
database will be created before the exception is raised if none exists. | |||
If the site is not found in the database, SiteNotFoundError will be raised. An | |||
empty database will be created before the exception is raised if none exists. | |||
""" | |||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||
@@ -161,7 +186,7 @@ class SitesDB: | |||
name, project, lang, base_url, article_path, script_path = site_data | |||
sql = dict(sql_data) | |||
namespaces = {} | |||
namespaces: dict[int, list[str]] = {} | |||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||
try: | |||
if ns_is_primary_name: # "Primary" name goes first in list | |||
@@ -171,7 +196,7 @@ class SitesDB: | |||
except KeyError: | |||
namespaces[ns_id] = [ns_name] | |||
return ( | |||
return _SiteInfoFromDB( | |||
name, | |||
project, | |||
lang, | |||
@@ -182,16 +207,16 @@ class SitesDB: | |||
namespaces, | |||
) | |||
def _make_site_object(self, name): | |||
"""Return a Site object associated with the site *name* in our sitesdb. | |||
def _make_site_object(self, name: str) -> Site: | |||
""" | |||
Return a Site object associated with the site *name* in our sitesdb. | |||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||
raised if the site is not in our sitesdb. | |||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised if | |||
the site is not in our sitesdb. | |||
""" | |||
cookiejar = self._get_cookiejar() | |||
(name, project, lang, base_url, article_path, script_path, sql, namespaces) = ( | |||
self._load_site_from_sitesdb(name) | |||
) | |||
info = self._load_site_from_sitesdb(name) | |||
name = info.name | |||
config = self.config | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
@@ -213,6 +238,7 @@ class SitesDB: | |||
search_config["nltk_dir"] = nltk_dir | |||
search_config["exclusions_db"] = self._exclusions_db | |||
sql = info.sql | |||
if not sql: | |||
sql = config.wiki.get("sql", OrderedDict()).copy() | |||
for key, value in sql.items(): | |||
@@ -221,13 +247,13 @@ class SitesDB: | |||
return Site( | |||
name=name, | |||
project=project, | |||
lang=lang, | |||
base_url=base_url, | |||
article_path=article_path, | |||
script_path=script_path, | |||
project=info.project, | |||
lang=info.lang, | |||
base_url=info.base_url, | |||
article_path=info.article_path, | |||
script_path=info.script_path, | |||
sql=sql, | |||
namespaces=namespaces, | |||
namespaces=info.namespaces, | |||
login=login, | |||
oauth=oauth, | |||
cookiejar=cookiejar, | |||
@@ -240,18 +266,18 @@ class SitesDB: | |||
search_config=search_config, | |||
) | |||
def _get_site_name_from_sitesdb(self, project, lang): | |||
"""Return the name of the first site with the given project and lang. | |||
def _get_site_name_from_sitesdb(self, project: str, lang: str) -> str | None: | |||
""" | |||
Return the name of the first site with the given project and lang. | |||
If we can't find the site with the given information, we'll also try | |||
searching for a site whose base_url contains "{lang}.{project}". There | |||
are a few sites, like the French Wikipedia, that set their project to | |||
something other than the expected "wikipedia" ("wikipédia" in this | |||
case), but we should correctly find them when doing get_site(lang="fr", | |||
project="wikipedia"). | |||
If we can't find the site with the given information, we'll also try searching | |||
for a site whose base_url contains "{lang}.{project}". There are a few sites, | |||
like the French Wikipedia, that set their project to something other than the | |||
expected "wikipedia" ("wikipédia" in this case), but we should correctly find | |||
them when doing get_site(lang="fr", project="wikipedia"). | |||
If the site is not found, return None. An empty sitesdb will be created | |||
if none exists. | |||
If the site is not found, return None. An empty sitesdb will be created if | |||
none exists. | |||
""" | |||
query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?" | |||
query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?" | |||
@@ -267,26 +293,27 @@ class SitesDB: | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
def _add_site_to_sitesdb(self, site): | |||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||
def _add_site_to_sitesdb(self, site: Site) -> None: | |||
""" | |||
Extract relevant info from a Site object and add it to the sitesdb. | |||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||
language, base URL, article path, script path, SQL connection data, and | |||
namespaces are extracted from the site and inserted into the sites | |||
database. If the sitesdb doesn't exist, we'll create it first. | |||
Works like a reverse _load_site_from_sitesdb(); the site's project, language, | |||
base URL, article path, script path, SQL connection data, and namespaces are | |||
extracted from the site and inserted into the sites database. If the sitesdb | |||
doesn't exist, we'll create it first. | |||
""" | |||
name = site.name | |||
sites_data = ( | |||
name, | |||
site.project, | |||
site.lang, | |||
site._base_url, | |||
site._article_path, | |||
site._script_path, | |||
site.base_url, | |||
site.article_path, | |||
site.script_path, | |||
) | |||
sql_data = [(name, key, val) for key, val in site._sql_data.items()] | |||
ns_data = [] | |||
for ns_id, ns_names in site._namespaces.items(): | |||
ns_data: list[tuple[str, int, str, bool]] = [] | |||
for ns_id, ns_names in site.namespaces.items(): | |||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||
for ns_name in ns_names: | |||
ns_data.append((name, ns_id, ns_name, False)) | |||
@@ -306,8 +333,10 @@ class SitesDB: | |||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||
def _remove_site_from_sitesdb(self, name): | |||
"""Remove a site by name from the sitesdb and the internal cache.""" | |||
def _remove_site_from_sitesdb(self, name: str) -> bool: | |||
""" | |||
Remove a site by name from the sitesdb and the internal cache. | |||
""" | |||
try: | |||
del self._sites[name] | |||
except KeyError: | |||
@@ -323,30 +352,34 @@ class SitesDB: | |||
self._logger.info(f"Removed site '{name}'") | |||
return True | |||
def get_site(self, name=None, project=None, lang=None): | |||
"""Return a Site instance based on information from the sitesdb. | |||
def get_site( | |||
self, | |||
name: str | None = None, | |||
project: str | None = None, | |||
lang: str | None = None, | |||
) -> Site: | |||
""" | |||
Return a Site instance based on information from the sitesdb. | |||
With no arguments, return the default site as specified by our config | |||
file. This is ``config.wiki["defaultSite"]``. | |||
With no arguments, return the default site as specified by our config file. | |||
This is ``config.wiki["defaultSite"]``. | |||
With *name* specified, return the site with that name. This is | |||
equivalent to the site's ``wikiid`` in the API, like *enwiki*. | |||
With *name* specified, return the site with that name. This is equivalent to | |||
the site's ``wikiid`` in the API, like *enwiki*. | |||
With *project* and *lang* specified, return the site whose project and | |||
language match these values. If there are multiple sites with the same | |||
values (unlikely), this is not a reliable way of loading a site. Call | |||
the function with an explicit *name* in that case. | |||
With *project* and *lang* specified, return the site whose project and language | |||
match these values. If there are multiple sites with the same values | |||
(unlikely), this is not a reliable way of loading a site. Call the function | |||
with an explicit *name* in that case. | |||
We will attempt to login to the site automatically using | |||
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are | |||
defined. | |||
Specifying a project without a lang or a lang without a project will | |||
raise :py:exc:`TypeError`. If all three args are specified, *name* will | |||
be first tried, then *project* and *lang* if *name* doesn't work. If a | |||
site cannot be found in the sitesdb, | |||
:py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An | |||
empty sitesdb will be created if none is found. | |||
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are defined. | |||
Specifying a project without a lang or a lang without a project will raise | |||
:py:exc:`TypeError`. If all three args are specified, *name* will be first | |||
tried, then *project* and *lang* if *name* doesn't work. If a site cannot be | |||
found in the sitesdb, :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be | |||
raised. An empty sitesdb will be created if none is found. | |||
""" | |||
# Someone specified a project without a lang, or vice versa: | |||
if (project and not lang) or (not project and lang): | |||
@@ -374,6 +407,7 @@ class SitesDB: | |||
raise | |||
# If we end up here, then project and lang are the only args given: | |||
assert project is not None and lang is not None, (project, lang) | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._get_site_object(name) | |||
@@ -381,30 +415,34 @@ class SitesDB: | |||
raise SiteNotFoundError(e) | |||
def add_site( | |||
self, project=None, lang=None, base_url=None, script_path="/w", sql=None | |||
): | |||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||
self, | |||
project: str | None = None, | |||
lang: str | None = None, | |||
base_url: str | None = None, | |||
script_path: str = "/w", | |||
sql: SqlConnInfo | None = None, | |||
) -> Site: | |||
""" | |||
Add a site to the sitesdb so it can be retrieved with get_site(). | |||
If only a project and a lang are given, we'll guess the *base_url* as | |||
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming | |||
``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``). | |||
If this is wrong, provide the correct *base_url* as an argument (in | |||
which case project and lang are ignored). Most wikis use ``"/w"`` as | |||
the script path (meaning the API is located at | |||
``"{base_url}{script_path}/api.php"`` -> | |||
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If | |||
your wiki is different, provide the script_path as an argument. SQL | |||
connection settings are guessed automatically using config's template | |||
value. If this is wrong or not specified, provide a dict of kwargs as | |||
*sql* and Site will pass it to :py:func:`pymysql.connect(**sql) | |||
<pymysql.connect>`, allowing you to make queries with | |||
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
Returns ``True`` if the site was added successfully or ``False`` if the | |||
site is already in our sitesdb (this can be done purposefully to update | |||
old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` | |||
if not enough information has been provided to identify the site (e.g. | |||
a *project* but not a *lang*). | |||
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming ``"https"`` | |||
if *useHTTPS* is ``True`` in config otherwise ``"http"``). If this is wrong, | |||
provide the correct *base_url* as an argument (in which case project and lang | |||
are ignored). Most wikis use ``"/w"`` as the script path (meaning the API is | |||
located at ``"{base_url}{script_path}/api.php"`` -> | |||
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If your wiki | |||
is different, provide the script_path as an argument. SQL connection settings | |||
are guessed automatically using config's template value. If this is wrong or | |||
not specified, provide a dict of kwargs as *sql* and Site will pass it to | |||
:py:func:`pymysql.connect(**sql) <pymysql.connect>`, allowing you to make | |||
queries with :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
Returns ``True`` if the site was added successfully or ``False`` if the site is | |||
already in our sitesdb (this can be done purposefully to update old site info). | |||
Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` if not enough | |||
information has been provided to identify the site (e.g. a *project* but not | |||
a *lang*). | |||
""" | |||
if not base_url: | |||
if not project or not lang: | |||
@@ -445,7 +483,12 @@ class SitesDB: | |||
self._add_site_to_sitesdb(site) | |||
return self._get_site_object(site.name) | |||
def remove_site(self, name=None, project=None, lang=None): | |||
def remove_site( | |||
self, | |||
name: str | None = None, | |||
project: str | None = None, | |||
lang: str | None = None, | |||
) -> bool: | |||
"""Remove a site from the sitesdb. | |||
Returns ``True`` if the site was removed successfully or ``False`` if | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,14 +18,21 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from logging import NullHandler, getLogger | |||
from socket import AF_INET, AF_INET6, inet_pton | |||
from time import gmtime, strptime | |||
from __future__ import annotations | |||
import socket | |||
import time | |||
import typing | |||
from logging import Logger, NullHandler, getLogger | |||
from typing import Any, Literal | |||
from earwigbot.exceptions import UserNotFoundError | |||
from earwigbot.wiki import constants | |||
from earwigbot.wiki.page import Page | |||
if typing.TYPE_CHECKING: | |||
from earwigbot.wiki.site import Site | |||
__all__ = ["User"] | |||
@@ -33,10 +40,9 @@ class User: | |||
""" | |||
**EarwigBot: Wiki Toolset: User** | |||
Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||
methods for getting a bunch of information about the user, such as | |||
editcount and user rights, methods for returning the user's userpage and | |||
talkpage, etc. | |||
Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for | |||
getting a bunch of information about the user, such as editcount and user rights, | |||
methods for returning the user's userpage and talkpage, etc. | |||
*Attributes:* | |||
@@ -56,24 +62,23 @@ class User: | |||
*Public methods:* | |||
- :py:meth:`reload`: forcibly reloads the user's attributes | |||
- :py:meth:`get_userpage`: returns a Page object representing the user's | |||
userpage | |||
- :py:meth:`get_talkpage`: returns a Page object representing the user's | |||
talkpage | |||
- :py:meth:`get_userpage`: returns a Page object representing the user's userpage | |||
- :py:meth:`get_talkpage`: returns a Page object representing the user's talkpage | |||
""" | |||
def __init__(self, site, name, logger=None): | |||
"""Constructor for new User instances. | |||
def __init__(self, site: Site, name: str, logger: Logger | None = None) -> None: | |||
""" | |||
Constructor for new User instances. | |||
Takes two arguments, a Site object (necessary for doing API queries), | |||
and the name of the user, preferably without "User:" in front, although | |||
this prefix will be automatically removed by the API if given. | |||
Takes two arguments, a Site object (necessary for doing API queries), and the | |||
name of the user, preferably without "User:" in front, although this prefix | |||
will be automatically removed by the API if given. | |||
You can also use site.get_user() instead, which returns a User object, | |||
and is preferred. | |||
You can also use site.get_user() instead, which returns a User object, and | |||
is preferred. | |||
We won't do any API queries yet for basic information about the user - | |||
save that for when the information is requested. | |||
We won't do any API queries yet for basic information about the user - save | |||
that for when the information is requested. | |||
""" | |||
self._site = site | |||
self._name = name | |||
@@ -85,22 +90,27 @@ class User: | |||
self._logger = getLogger("earwigbot.wiki") | |||
self._logger.addHandler(NullHandler()) | |||
def __repr__(self): | |||
"""Return the canonical string representation of the User.""" | |||
def __repr__(self) -> str: | |||
""" | |||
Return the canonical string representation of the User. | |||
""" | |||
return f"User(name={self._name!r}, site={self._site!r})" | |||
def __str__(self): | |||
"""Return a nice string representation of the User.""" | |||
def __str__(self) -> str: | |||
""" | |||
Return a nice string representation of the User. | |||
""" | |||
return f'<User "{self.name}" of {str(self.site)}>' | |||
def _get_attribute(self, attr): | |||
"""Internally used to get an attribute by name. | |||
def _get_attribute(self, attr: str) -> Any: | |||
""" | |||
Internally used to get an attribute by name. | |||
We'll call _load_attributes() to get this (and all other attributes) | |||
from the API if it is not already defined. | |||
We'll call _load_attributes() to get this (and all other attributes) from the | |||
API if it is not already defined. | |||
Raises UserNotFoundError if a nonexistant user prevents us from | |||
returning a certain attribute. | |||
Raises UserNotFoundError if a nonexistant user prevents us from returning a | |||
certain attribute. | |||
""" | |||
if not hasattr(self, attr): | |||
self._load_attributes() | |||
@@ -109,11 +119,12 @@ class User: | |||
raise UserNotFoundError(e) | |||
return getattr(self, attr) | |||
def _load_attributes(self): | |||
"""Internally used to load all attributes from the API. | |||
def _load_attributes(self) -> None: | |||
""" | |||
Internally used to load all attributes from the API. | |||
Normally, this is called by _get_attribute() when a requested attribute | |||
is not defined. This defines it. | |||
Normally, this is called by _get_attribute() when a requested attribute is not | |||
defined. This defines it. | |||
""" | |||
props = "blockinfo|groups|rights|editcount|registration|emailable|gender" | |||
result = self.site.api_query( | |||
@@ -150,11 +161,11 @@ class User: | |||
reg = res["registration"] | |||
try: | |||
self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") | |||
self._registration = time.strptime(reg, "%Y-%m-%dT%H:%M:%SZ") | |||
except TypeError: | |||
# Sometimes the API doesn't give a date; the user's probably really | |||
# old. There's nothing else we can do! | |||
self._registration = gmtime(0) | |||
self._registration = time.gmtime(0) | |||
try: | |||
res["emailable"] | |||
@@ -166,24 +177,28 @@ class User: | |||
self._gender = res["gender"] | |||
@property | |||
def site(self): | |||
"""The user's corresponding Site object.""" | |||
def site(self) -> Site: | |||
""" | |||
The user's corresponding Site object. | |||
""" | |||
return self._site | |||
@property | |||
def name(self): | |||
"""The user's username. | |||
def name(self) -> str: | |||
""" | |||
The user's username. | |||
This will never make an API query on its own, but if one has already | |||
been made by the time this is retrieved, the username may have been | |||
"normalized" from the original input to the constructor, converted into | |||
a Unicode object, with underscores removed, etc. | |||
This will never make an API query on its own, but if one has already been made | |||
by the time this is retrieved, the username may have been "normalized" from the | |||
original input to the constructor, converted into a Unicode object, with | |||
underscores removed, etc. | |||
""" | |||
return self._name | |||
@property | |||
def exists(self): | |||
"""``True`` if the user exists, or ``False`` if they do not. | |||
def exists(self) -> bool: | |||
""" | |||
``True`` if the user exists, or ``False`` if they do not. | |||
Makes an API query only if we haven't made one already. | |||
""" | |||
@@ -192,124 +207,135 @@ class User: | |||
return self._exists | |||
@property | |||
def userid(self): | |||
"""An integer ID used by MediaWiki to represent the user. | |||
def userid(self) -> int: | |||
""" | |||
An integer ID used by MediaWiki to represent the user. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_userid") | |||
@property | |||
def blockinfo(self): | |||
"""Information about any current blocks on the user. | |||
def blockinfo(self) -> dict[str, Any] | Literal[False]: | |||
""" | |||
Information about any current blocks on the user. | |||
If the user is not blocked, returns ``False``. If they are, returns a | |||
dict with three keys: ``"by"`` is the blocker's username, ``"reason"`` | |||
is the reason why they were blocked, and ``"expiry"`` is when the block | |||
expires. | |||
If the user is not blocked, returns ``False``. If they are, returns a dict with | |||
three keys: ``"by"`` is the blocker's username, ``"reason"`` is the reason why | |||
they were blocked, and ``"expiry"`` is when the block expires. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_blockinfo") | |||
@property | |||
def groups(self): | |||
"""A list of groups this user is in, including ``"*"``. | |||
def groups(self) -> list[str]: | |||
""" | |||
A list of groups this user is in, including ``"*"``. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_groups") | |||
@property | |||
def rights(self): | |||
"""A list of this user's rights. | |||
def rights(self) -> list[str]: | |||
""" | |||
A list of this user's rights. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_rights") | |||
@property | |||
def editcount(self): | |||
"""Returns the number of edits made by the user. | |||
def editcount(self) -> int: | |||
""" | |||
Returns the number of edits made by the user. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_editcount") | |||
@property | |||
def registration(self): | |||
"""The time the user registered as a :py:class:`time.struct_time`. | |||
def registration(self) -> time.struct_time: | |||
""" | |||
The time the user registered as a :py:class:`time.struct_time`. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_registration") | |||
@property | |||
def emailable(self): | |||
"""``True`` if the user can be emailed, or ``False`` if they cannot. | |||
def emailable(self) -> bool: | |||
""" | |||
``True`` if the user can be emailed, or ``False`` if they cannot. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_emailable") | |||
@property | |||
def gender(self): | |||
"""The user's gender. | |||
def gender(self) -> str: | |||
""" | |||
The user's gender. | |||
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they | |||
did not specify it. | |||
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they did not | |||
specify it. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||
exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_gender") | |||
@property | |||
def is_ip(self): | |||
"""``True`` if the user is an IP address, or ``False`` otherwise. | |||
def is_ip(self) -> bool: | |||
""" | |||
``True`` if the user is an IP address, or ``False`` otherwise. | |||
This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the | |||
username. No API queries are made. | |||
This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the username. | |||
No API queries are made. | |||
""" | |||
try: | |||
inet_pton(AF_INET, self.name) | |||
socket.inet_pton(socket.AF_INET, self.name) | |||
except OSError: | |||
try: | |||
inet_pton(AF_INET6, self.name) | |||
socket.inet_pton(socket.AF_INET6, self.name) | |||
except OSError: | |||
return False | |||
return True | |||
def reload(self): | |||
"""Forcibly reload the user's attributes. | |||
def reload(self) -> None: | |||
""" | |||
Forcibly reload the user's attributes. | |||
Emphasis on *reload*: this is only necessary if there is reason to | |||
believe they have changed. | |||
Emphasis on *reload*: this is only necessary if there is reason to believe they | |||
have changed. | |||
""" | |||
self._load_attributes() | |||
def get_userpage(self): | |||
"""Return a Page object representing the user's userpage. | |||
def get_userpage(self) -> Page: | |||
""" | |||
Return a Page object representing the user's userpage. | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
No checks are made to see if it exists or not. Proper site namespace conventions | |||
are followed. | |||
""" | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER) | |||
pagename = ":".join((prefix, self._name)) | |||
return Page(self.site, pagename) | |||
def get_talkpage(self): | |||
"""Return a Page object representing the user's talkpage. | |||
def get_talkpage(self) -> Page: | |||
""" | |||
Return a Page object representing the user's talkpage. | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
No checks are made to see if it exists or not. Proper site namespace conventions | |||
are followed. | |||
""" | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) | |||
pagename = ":".join((prefix, self._name)) | |||