@@ -1,6 +1,6 @@ | |||||
v0.4 (unreleased): | v0.4 (unreleased): | ||||
- Migrated to Python 3 (3.11+). | |||||
- Migrated to Python 3 (3.11+). Substantial code cleanup. | |||||
- Migrated from oursql to pymysql. | - Migrated from oursql to pymysql. | ||||
- Copyvios: Configurable proxy support for specific domains. | - Copyvios: Configurable proxy support for specific domains. | ||||
- Copyvios: Parser-directed URL redirection. | - Copyvios: Parser-directed URL redirection. | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -20,15 +20,16 @@ | |||||
""" | """ | ||||
`EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits | `EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits | ||||
Wikipedia and interacts with people over IRC. | |||||
Wikipedia and interacts over IRC. | |||||
See :file:`README.rst` for an overview, or the :file:`docs/` directory for | |||||
details. This documentation is also available `online | |||||
<https://packages.python.org/earwigbot>`_. | |||||
See :file:`README.rst` for an overview, or the :file:`docs/` directory for details. | |||||
This documentation is also available `online <https://packages.python.org/earwigbot>`_. | |||||
""" | """ | ||||
import typing | |||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2009-2019 Ben Kurtovic" | |||||
__copyright__ = "Copyright (C) 2009-2024 Ben Kurtovic" | |||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
__version__ = "0.4.dev0" | __version__ = "0.4.dev0" | ||||
__email__ = "ben.kurtovic@gmail.com" | __email__ = "ben.kurtovic@gmail.com" | ||||
@@ -57,12 +58,26 @@ from earwigbot import lazy | |||||
importer = lazy.LazyImporter() | importer = lazy.LazyImporter() | ||||
bot = importer.new("earwigbot.bot") | |||||
commands = importer.new("earwigbot.commands") | |||||
config = importer.new("earwigbot.config") | |||||
exceptions = importer.new("earwigbot.exceptions") | |||||
irc = importer.new("earwigbot.irc") | |||||
managers = importer.new("earwigbot.managers") | |||||
tasks = importer.new("earwigbot.tasks") | |||||
util = importer.new("earwigbot.util") | |||||
wiki = importer.new("earwigbot.wiki") | |||||
if typing.TYPE_CHECKING: | |||||
from earwigbot import ( | |||||
bot, | |||||
commands, | |||||
config, | |||||
exceptions, | |||||
irc, | |||||
managers, | |||||
tasks, | |||||
util, | |||||
wiki, | |||||
) | |||||
else: | |||||
bot = importer.new("earwigbot.bot") | |||||
commands = importer.new("earwigbot.commands") | |||||
config = importer.new("earwigbot.config") | |||||
exceptions = importer.new("earwigbot.exceptions") | |||||
irc = importer.new("earwigbot.irc") | |||||
managers = importer.new("earwigbot.managers") | |||||
tasks = importer.new("earwigbot.tasks") | |||||
util = importer.new("earwigbot.util") | |||||
wiki = importer.new("earwigbot.wiki") |
@@ -107,6 +107,9 @@ class APIError(ServiceError): | |||||
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. | Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. | ||||
""" | """ | ||||
code: str | |||||
info: str | |||||
class SQLError(ServiceError): | class SQLError(ServiceError): | ||||
"""Some error involving SQL querying occurred. | """Some error involving SQL querying occurred. | ||||
@@ -43,13 +43,14 @@ JobKwargs = TypedDict( | |||||
"nocreate": NotRequired[bool], | "nocreate": NotRequired[bool], | ||||
"recursive": NotRequired[bool | int], | "recursive": NotRequired[bool | int], | ||||
"tag-categories": NotRequired[bool], | "tag-categories": NotRequired[bool], | ||||
"not-in-category": NotRequired[str], | |||||
"site": NotRequired[str], | "site": NotRequired[str], | ||||
"dry-run": NotRequired[bool], | "dry-run": NotRequired[bool], | ||||
}, | }, | ||||
) | ) | ||||
@dataclass | |||||
@dataclass(frozen=True) | |||||
class Job: | class Job: | ||||
""" | """ | ||||
Represents a single wikiproject-tagging task. | Represents a single wikiproject-tagging task. | ||||
@@ -68,11 +69,20 @@ class Job: | |||||
only_with: set[str] | None | only_with: set[str] | None | ||||
nocreate: bool | nocreate: bool | ||||
tag_categories: bool | tag_categories: bool | ||||
not_in_category: str | None | |||||
dry_run: bool | dry_run: bool | ||||
counter: int = 0 | |||||
_counter: list[int] = [0] # Wrap to allow frozen updates | |||||
processed_cats: set[str] = field(default_factory=set) | processed_cats: set[str] = field(default_factory=set) | ||||
processed_pages: set[str] = field(default_factory=set) | processed_pages: set[str] = field(default_factory=set) | ||||
skip_pages: set[str] = field(default_factory=set) | |||||
@property | |||||
def counter(self) -> int: | |||||
return self._counter[0] | |||||
def add_to_counter(self, value: int) -> None: | |||||
self._counter[0] += value | |||||
class ShutoffEnabled(Exception): | class ShutoffEnabled(Exception): | ||||
@@ -90,7 +100,7 @@ class WikiProjectTagger(Task): | |||||
Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER | Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER | ||||
[--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS] | [--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS] | ||||
[--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]] | [--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]] | ||||
[--site SITE] [--dry-run]` | |||||
[--not-in-category CAT] [--site SITE] [--dry-run]` | |||||
.. glossary:: | .. glossary:: | ||||
@@ -126,6 +136,8 @@ class WikiProjectTagger(Task): | |||||
``NUM`` isn't provided, go infinitely (this can be dangerous) | ``NUM`` isn't provided, go infinitely (this can be dangerous) | ||||
``--tag-categories`` | ``--tag-categories`` | ||||
also tag category pages | also tag category pages | ||||
``--not-in-category CAT`` | |||||
skip talk pages that are already members of this category | |||||
``--site SITE`` | ``--site SITE`` | ||||
the ID of the site to tag pages on, defaulting to the default site | the ID of the site to tag pages on, defaulting to the default site | ||||
``--dry-run`` | ``--dry-run`` | ||||
@@ -189,6 +201,7 @@ class WikiProjectTagger(Task): | |||||
nocreate = kwargs.get("nocreate", False) | nocreate = kwargs.get("nocreate", False) | ||||
recursive = kwargs.get("recursive", 0) | recursive = kwargs.get("recursive", 0) | ||||
tag_categories = kwargs.get("tag-categories", False) | tag_categories = kwargs.get("tag-categories", False) | ||||
not_in_category = kwargs.get("not-in-category") | |||||
dry_run = kwargs.get("dry-run", False) | dry_run = kwargs.get("dry-run", False) | ||||
banner, names = self.get_names(site, banner) | banner, names = self.get_names(site, banner) | ||||
if not names: | if not names: | ||||
@@ -210,6 +223,7 @@ class WikiProjectTagger(Task): | |||||
only_with=only_with, | only_with=only_with, | ||||
nocreate=nocreate, | nocreate=nocreate, | ||||
tag_categories=tag_categories, | tag_categories=tag_categories, | ||||
not_in_category=not_in_category, | |||||
dry_run=dry_run, | dry_run=dry_run, | ||||
) | ) | ||||
@@ -224,6 +238,11 @@ class WikiProjectTagger(Task): | |||||
""" | """ | ||||
Run a tagging *job* on a given *site*. | Run a tagging *job* on a given *site*. | ||||
""" | """ | ||||
if job.not_in_category: | |||||
skip_category = site.get_category(job.not_in_category) | |||||
for page in skip_category.get_members(): | |||||
job.skip_pages.add(page.title) | |||||
if "category" in kwargs: | if "category" in kwargs: | ||||
title = kwargs["category"] | title = kwargs["category"] | ||||
title = self.guess_namespace(site, title, constants.NS_CATEGORY) | title = self.guess_namespace(site, title, constants.NS_CATEGORY) | ||||
@@ -322,6 +341,10 @@ class WikiProjectTagger(Task): | |||||
if not page.is_talkpage: | if not page.is_talkpage: | ||||
page = page.toggle_talk() | page = page.toggle_talk() | ||||
if page.title in job.skip_pages: | |||||
self.logger.debug(f"Skipping page, in category to skip: [[{page.title}]]") | |||||
return | |||||
if page.title in job.processed_pages: | if page.title in job.processed_pages: | ||||
self.logger.debug(f"Skipping page, already processed: [[{page.title}]]") | self.logger.debug(f"Skipping page, already processed: [[{page.title}]]") | ||||
return | return | ||||
@@ -330,7 +353,7 @@ class WikiProjectTagger(Task): | |||||
if job.counter % 10 == 0: # Do a shutoff check every ten pages | if job.counter % 10 == 0: # Do a shutoff check every ten pages | ||||
if self.shutoff_enabled(page.site): | if self.shutoff_enabled(page.site): | ||||
raise ShutoffEnabled() | raise ShutoffEnabled() | ||||
job.counter += 1 | |||||
job.add_to_counter(1) | |||||
try: | try: | ||||
code = page.parse() | code = page.parse() | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,6 +18,9 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from collections.abc import Iterator | |||||
from earwigbot.wiki.constants import Service | |||||
from earwigbot.wiki.page import Page | from earwigbot.wiki.page import Page | ||||
__all__ = ["Category"] | __all__ = ["Category"] | ||||
@@ -27,14 +30,14 @@ class Category(Page): | |||||
""" | """ | ||||
**EarwigBot: Wiki Toolset: Category** | **EarwigBot: Wiki Toolset: Category** | ||||
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a | |||||
subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional | |||||
methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should | |||||
work fine on :py:class:`Category` objects. :py:meth:`site.get_page() | |||||
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` | |||||
instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in | |||||
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` | |||||
is shorthand, accepting category names without the namespace prefix. | |||||
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a subclass | |||||
of :py:class:`~earwigbot.wiki.page.Page`. Provides additional methods, but | |||||
:py:class:`~earwigbot.wiki.page.Page`'s own methods should work fine on | |||||
:py:class:`Category` objects. :py:meth:`site.get_page() | |||||
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` instead of | |||||
a :py:class:`~earwigbot.wiki.page.Page` if the given title is in the category | |||||
namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` is shorthand, | |||||
accepting category names without the namespace prefix. | |||||
*Attributes:* | *Attributes:* | ||||
@@ -48,22 +51,30 @@ class Category(Page): | |||||
- :py:meth:`get_members`: iterates over Pages in the category | - :py:meth:`get_members`: iterates over Pages in the category | ||||
""" | """ | ||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Category.""" | |||||
def __repr__(self) -> str: | |||||
""" | |||||
Return the canonical string representation of the Category. | |||||
""" | |||||
res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})" | res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})" | ||||
return res.format(self._title, self._follow_redirects, self._site) | return res.format(self._title, self._follow_redirects, self._site) | ||||
def __str__(self): | |||||
"""Return a nice string representation of the Category.""" | |||||
def __str__(self) -> str: | |||||
""" | |||||
Return a nice string representation of the Category. | |||||
""" | |||||
return f'<Category "{self.title}" of {str(self.site)}>' | return f'<Category "{self.title}" of {str(self.site)}>' | ||||
def __iter__(self): | |||||
"""Iterate over all members of the category.""" | |||||
def __iter__(self) -> Iterator[Page]: | |||||
""" | |||||
Iterate over all members of the category. | |||||
""" | |||||
return self.get_members() | return self.get_members() | ||||
def _get_members_via_api(self, limit, follow): | |||||
"""Iterate over Pages in the category using the API.""" | |||||
params = { | |||||
def _get_members_via_api(self, limit: int | None, follow: bool) -> Iterator[Page]: | |||||
""" | |||||
Iterate over Pages in the category using the API. | |||||
""" | |||||
params: dict[str, str | int] = { | |||||
"action": "query", | "action": "query", | ||||
"list": "categorymembers", | "list": "categorymembers", | ||||
"cmtitle": self.title, | "cmtitle": self.title, | ||||
@@ -84,8 +95,10 @@ class Category(Page): | |||||
else: | else: | ||||
break | break | ||||
def _get_members_via_sql(self, limit, follow): | |||||
"""Iterate over Pages in the category using SQL.""" | |||||
def _get_members_via_sql(self, limit: int | None, follow: bool) -> Iterator[Page]: | |||||
""" | |||||
Iterate over Pages in the category using SQL. | |||||
""" | |||||
query = """SELECT page_title, page_namespace, page_id FROM page | query = """SELECT page_title, page_namespace, page_id FROM page | ||||
JOIN categorylinks ON page_id = cl_from | JOIN categorylinks ON page_id = cl_from | ||||
WHERE cl_to = ?""" | WHERE cl_to = ?""" | ||||
@@ -107,16 +120,20 @@ class Category(Page): | |||||
title = base | title = base | ||||
yield self.site.get_page(title, follow_redirects=follow, pageid=row[2]) | yield self.site.get_page(title, follow_redirects=follow, pageid=row[2]) | ||||
def _get_size_via_api(self, member_type): | |||||
"""Return the size of the category using the API.""" | |||||
def _get_size_via_api(self, member_type: str) -> int: | |||||
""" | |||||
Return the size of the category using the API. | |||||
""" | |||||
result = self.site.api_query( | result = self.site.api_query( | ||||
action="query", prop="categoryinfo", titles=self.title | action="query", prop="categoryinfo", titles=self.title | ||||
) | ) | ||||
info = list(result["query"]["pages"].values())[0]["categoryinfo"] | info = list(result["query"]["pages"].values())[0]["categoryinfo"] | ||||
return info[member_type] | return info[member_type] | ||||
def _get_size_via_sql(self, member_type): | |||||
"""Return the size of the category using SQL.""" | |||||
def _get_size_via_sql(self, member_type: str) -> int: | |||||
""" | |||||
Return the size of the category using SQL. | |||||
""" | |||||
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" | query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" | ||||
title = self.title.replace(" ", "_").split(":", 1)[1] | title = self.title.replace(" ", "_").split(":", 1)[1] | ||||
if member_type == "size": | if member_type == "size": | ||||
@@ -126,49 +143,54 @@ class Category(Page): | |||||
result = self.site.sql_query(query, (title, member_type[:-1])) | result = self.site.sql_query(query, (title, member_type[:-1])) | ||||
return list(result)[0][0] | return list(result)[0][0] | ||||
def _get_size(self, member_type): | |||||
"""Return the size of the category.""" | |||||
def _get_size(self, member_type: str) -> int: | |||||
""" | |||||
Return the size of the category. | |||||
""" | |||||
services = { | services = { | ||||
self.site.SERVICE_API: self._get_size_via_api, | |||||
self.site.SERVICE_SQL: self._get_size_via_sql, | |||||
Service.API: self._get_size_via_api, | |||||
Service.SQL: self._get_size_via_sql, | |||||
} | } | ||||
return self.site.delegate(services, (member_type,)) | |||||
return self.site.delegate(services, member_type) | |||||
@property | @property | ||||
def size(self): | |||||
"""The total number of members in the category. | |||||
def size(self) -> int: | |||||
""" | |||||
The total number of members in the category. | |||||
Includes pages, files, and subcats. Equal to :py:attr:`pages` + | Includes pages, files, and subcats. Equal to :py:attr:`pages` + | ||||
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or | |||||
SQL depending on which are enabled and the amount of lag on each. This | |||||
is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | |||||
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or SQL | |||||
depending on which are enabled and the amount of lag on each. This is handled | |||||
by :py:meth:`site.delegate() <earwigbot.wiki.site.Site.delegate>`. | |||||
""" | """ | ||||
return self._get_size("size") | return self._get_size("size") | ||||
@property | @property | ||||
def pages(self): | |||||
"""The number of pages in the category. | |||||
def pages(self) -> int: | |||||
""" | |||||
The number of pages in the category. | |||||
This will use either the API or SQL depending on which are enabled and | |||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
This will use either the API or SQL depending on which are enabled and the | |||||
amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | <earwigbot.wiki.site.Site.delegate>`. | ||||
""" | """ | ||||
return self._get_size("pages") | return self._get_size("pages") | ||||
@property | @property | ||||
def files(self): | |||||
"""The number of files in the category. | |||||
def files(self) -> int: | |||||
""" | |||||
The number of files in the category. | |||||
This will use either the API or SQL depending on which are enabled and | |||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
This will use either the API or SQL depending on which are enabled and the | |||||
amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | <earwigbot.wiki.site.Site.delegate>`. | ||||
""" | """ | ||||
return self._get_size("files") | return self._get_size("files") | ||||
@property | @property | ||||
def subcats(self): | |||||
"""The number of subcategories in the category. | |||||
def subcats(self) -> int: | |||||
""" | |||||
The number of subcategories in the category. | |||||
This will use either the API or SQL depending on which are enabled and | This will use either the API or SQL depending on which are enabled and | ||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | the amount of lag on each. This is handled by :py:meth:`site.delegate() | ||||
@@ -176,36 +198,38 @@ class Category(Page): | |||||
""" | """ | ||||
return self._get_size("subcats") | return self._get_size("subcats") | ||||
def get_members(self, limit=None, follow_redirects=None): | |||||
"""Iterate over Pages in the category. | |||||
def get_members( | |||||
self, limit: int | None = None, follow_redirects: bool | None = None | |||||
) -> Iterator[Page]: | |||||
""" | |||||
Iterate over Pages in the category. | |||||
If *limit* is given, we will provide this many pages, or less if the | |||||
category is smaller. By default, *limit* is ``None``, meaning we will | |||||
keep iterating over members until the category is exhausted. | |||||
*follow_redirects* is passed directly to :py:meth:`site.get_page() | |||||
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which | |||||
will use the value passed to our :py:meth:`__init__`. | |||||
If *limit* is given, we will provide this many pages, or less if the category | |||||
is smaller. By default, *limit* is ``None``, meaning we will keep iterating | |||||
over members until the category is exhausted. *follow_redirects* is passed | |||||
directly to :py:meth:`site.get_page() <earwigbot.wiki.site.Site.get_page>`; | |||||
it defaults to ``None``, which will use the value passed to our | |||||
:py:meth:`__init__`. | |||||
This will use either the API or SQL depending on which are enabled and | |||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
This will use either the API or SQL depending on which are enabled and the | |||||
amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | <earwigbot.wiki.site.Site.delegate>`. | ||||
.. note:: | .. note:: | ||||
Be careful when iterating over very large categories with no limit. | |||||
If using the API, at best, you will make one query per 5000 pages, | |||||
which can add up significantly for categories with hundreds of | |||||
thousands of members. As for SQL, note that *all page titles are | |||||
stored internally* as soon as the query is made, so the site-wide | |||||
SQL lock can be freed and unrelated queries can be made without | |||||
requiring a separate connection to be opened. This is generally not | |||||
an issue unless your category's size approaches several hundred | |||||
Be careful when iterating over very large categories with no limit. If using | |||||
the API, at best, you will make one query per 5000 pages, which can add up | |||||
significantly for categories with hundreds of thousands of members. As for | |||||
SQL, note that *all page titles are stored internally* as soon as the query | |||||
is made, so the site-wide SQL lock can be freed and unrelated queries can be | |||||
made without requiring a separate connection to be opened. This is generally | |||||
not an issue unless your category's size approaches several hundred | |||||
thousand, in which case the sheer number of titles in memory becomes | thousand, in which case the sheer number of titles in memory becomes | ||||
problematic. | problematic. | ||||
""" | """ | ||||
services = { | services = { | ||||
self.site.SERVICE_API: self._get_members_via_api, | |||||
self.site.SERVICE_SQL: self._get_members_via_sql, | |||||
Service.API: self._get_members_via_api, | |||||
Service.SQL: self._get_members_via_sql, | |||||
} | } | ||||
if follow_redirects is None: | if follow_redirects is None: | ||||
follow_redirects = self._follow_redirects | follow_redirects = self._follow_redirects | ||||
return self.site.delegate(services, (limit, follow_redirects)) | |||||
return self.site.delegate(services, limit, follow_redirects) |
@@ -31,14 +31,50 @@ Import directly with ``from earwigbot.wiki import constants`` or | |||||
:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``). | :py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``). | ||||
""" | """ | ||||
__all__ = [ | |||||
"NS_CATEGORY_TALK", | |||||
"NS_CATEGORY", | |||||
"NS_DRAFT_TALK", | |||||
"NS_DRAFT", | |||||
"NS_FILE_TALK", | |||||
"NS_FILE", | |||||
"NS_HELP_TALK", | |||||
"NS_HELP", | |||||
"NS_MAIN", | |||||
"NS_MEDIA", | |||||
"NS_MEDIAWIKI_TALK", | |||||
"NS_MEDIAWIKI", | |||||
"NS_MODULE_TALK", | |||||
"NS_MODULE", | |||||
"NS_PORTAL_TALK", | |||||
"NS_PORTAL", | |||||
"NS_PROJECT_TALK", | |||||
"NS_PROJECT", | |||||
"NS_SPECIAL", | |||||
"NS_TALK", | |||||
"NS_TEMPLATE_TALK", | |||||
"NS_TEMPLATE", | |||||
"NS_USER_TALK", | |||||
"NS_USER", | |||||
"USER_AGENT", | |||||
] | |||||
import platform | |||||
from enum import Enum | |||||
import earwigbot | |||||
# Default User Agent when making API queries: | # Default User Agent when making API queries: | ||||
from platform import python_version as _p | |||||
USER_AGENT = ( | |||||
f"EarwigBot/{earwigbot.__version__} " | |||||
f"(Python/{platform.python_version()}; https://github.com/earwig/earwigbot)" | |||||
) | |||||
from earwigbot import __version__ as _v | |||||
USER_AGENT = "EarwigBot/{0} (Python/{1}; https://github.com/earwig/earwigbot)" | |||||
USER_AGENT = USER_AGENT.format(_v, _p()) | |||||
del _v, _p | |||||
class Service(Enum): | |||||
API = 1 | |||||
SQL = 2 | |||||
# Default namespace IDs: | # Default namespace IDs: | ||||
NS_MAIN = 0 | NS_MAIN = 0 | ||||
@@ -57,5 +93,13 @@ NS_HELP = 12 | |||||
NS_HELP_TALK = 13 | NS_HELP_TALK = 13 | ||||
NS_CATEGORY = 14 | NS_CATEGORY = 14 | ||||
NS_CATEGORY_TALK = 15 | NS_CATEGORY_TALK = 15 | ||||
NS_PORTAL = 100 | |||||
NS_PORTAL_TALK = 101 | |||||
NS_DRAFT = 118 | |||||
NS_DRAFT_TALK = 119 | |||||
NS_MODULE = 828 | |||||
NS_MODULE_TALK = 829 | |||||
NS_SPECIAL = -1 | NS_SPECIAL = -1 | ||||
NS_MEDIA = -2 | NS_MEDIA = -2 |
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,17 +18,27 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import annotations | |||||
import hashlib | |||||
import re | import re | ||||
from hashlib import md5 | |||||
from logging import NullHandler, getLogger | |||||
from time import gmtime, strftime | |||||
from urllib.parse import quote | |||||
import time | |||||
import typing | |||||
import urllib.parse | |||||
from collections.abc import Iterable | |||||
from logging import Logger, NullHandler, getLogger | |||||
from typing import Any | |||||
import mwparserfromhell | import mwparserfromhell | ||||
from earwigbot import exceptions | from earwigbot import exceptions | ||||
from earwigbot.exceptions import APIError | |||||
from earwigbot.wiki.copyvios import CopyvioMixIn | from earwigbot.wiki.copyvios import CopyvioMixIn | ||||
if typing.TYPE_CHECKING: | |||||
from earwigbot.wiki.site import Site | |||||
from earwigbot.wiki.user import User | |||||
__all__ = ["Page"] | __all__ = ["Page"] | ||||
@@ -36,10 +46,10 @@ class Page(CopyvioMixIn): | |||||
""" | """ | ||||
**EarwigBot: Wiki Toolset: Page** | **EarwigBot: Wiki Toolset: Page** | ||||
Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||||
methods for getting information about the page, getting page content, and | |||||
so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of | |||||
:py:class:`Page` with additional methods. | |||||
Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for | |||||
getting information about the page, getting page content, and so on. | |||||
:py:class:`~earwigbot.wiki.category.Category` is a subclass of :py:class:`Page` | |||||
with additional methods. | |||||
*Attributes:* | *Attributes:* | ||||
@@ -59,20 +69,19 @@ class Page(CopyvioMixIn): | |||||
- :py:meth:`reload`: forcibly reloads the page's attributes | - :py:meth:`reload`: forcibly reloads the page's attributes | ||||
- :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa | - :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa | ||||
- :py:meth:`get`: returns the page's content | - :py:meth:`get`: returns the page's content | ||||
- :py:meth:`get_redirect_target`: returns the page's destination if it is a | |||||
redirect | |||||
- :py:meth:`get_creator`: returns a User object representing the first | |||||
person to edit the page | |||||
- :py:meth:`get_redirect_target`: returns the page's destination if it is a redirect | |||||
- :py:meth:`get_creator`: returns a User object representing the first person to | |||||
edit the page | |||||
- :py:meth:`parse`: parses the page content for templates, links, etc | - :py:meth:`parse`: parses the page content for templates, links, etc | ||||
- :py:meth:`edit`: replaces the page's content or creates a new page | - :py:meth:`edit`: replaces the page's content or creates a new page | ||||
- :py:meth:`add_section`: adds a new section at the bottom of the page | - :py:meth:`add_section`: adds a new section at the bottom of the page | ||||
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit | |||||
the page, per ``{{bots}}``/``{{nobots}}`` | |||||
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit the | |||||
page, per ``{{bots}}``/``{{nobots}}`` | |||||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: | |||||
checks the page for copyright violations | |||||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: | |||||
checks the page like :py:meth:`copyvio_check`, but against a specific URL | |||||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: checks the page | |||||
for copyright violations | |||||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: checks the | |||||
page like :py:meth:`copyvio_check`, but against a specific URL | |||||
""" | """ | ||||
PAGE_UNKNOWN = 0 | PAGE_UNKNOWN = 0 | ||||
@@ -80,18 +89,26 @@ class Page(CopyvioMixIn): | |||||
PAGE_MISSING = 2 | PAGE_MISSING = 2 | ||||
PAGE_EXISTS = 3 | PAGE_EXISTS = 3 | ||||
def __init__(self, site, title, follow_redirects=False, pageid=None, logger=None): | |||||
"""Constructor for new Page instances. | |||||
def __init__( | |||||
self, | |||||
site: Site, | |||||
title: str, | |||||
follow_redirects: bool = False, | |||||
pageid: int | None = None, | |||||
logger: Logger | None = None, | |||||
) -> None: | |||||
""" | |||||
Constructor for new Page instances. | |||||
Takes four arguments: a Site object, the Page's title (or pagename), | |||||
whether or not to follow redirects (optional, defaults to False), and | |||||
a page ID to supplement the title (optional, defaults to None - i.e., | |||||
we will have to query the API to get it). | |||||
Takes four arguments: a Site object, the Page's title (or pagename), whether or | |||||
not to follow redirects (optional, defaults to False), and a page ID to | |||||
supplement the title (optional, defaults to None - i.e., we will have to query | |||||
the API to get it). | |||||
As with User, site.get_page() is preferred. | As with User, site.get_page() is preferred. | ||||
__init__() will not do any API queries, but it will use basic namespace | |||||
logic to determine our namespace ID and if we are a talkpage. | |||||
__init__() will not do any API queries, but it will use basic namespace logic | |||||
to determine our namespace ID and if we are a talkpage. | |||||
""" | """ | ||||
super().__init__(site) | super().__init__(site) | ||||
self._site = site | self._site = site | ||||
@@ -108,16 +125,16 @@ class Page(CopyvioMixIn): | |||||
# Attributes to be loaded through the API: | # Attributes to be loaded through the API: | ||||
self._exists = self.PAGE_UNKNOWN | self._exists = self.PAGE_UNKNOWN | ||||
self._is_redirect = None | |||||
self._lastrevid = None | |||||
self._protection = None | |||||
self._fullurl = None | |||||
self._content = None | |||||
self._creator = None | |||||
self._is_redirect: bool | None = None | |||||
self._lastrevid: int | None = None | |||||
self._protection: dict | None = None | |||||
self._fullurl: str | None = None | |||||
self._content: str | None = None | |||||
self._creator: str | None = None | |||||
# Attributes used for editing/deleting/protecting/etc: | # Attributes used for editing/deleting/protecting/etc: | ||||
self._basetimestamp = None | |||||
self._starttimestamp = None | |||||
self._basetimestamp: str | None = None | |||||
self._starttimestamp: str | None = None | |||||
# Try to determine the page's namespace using our site's namespace | # Try to determine the page's namespace using our site's namespace | ||||
# converter: | # converter: | ||||
@@ -137,54 +154,60 @@ class Page(CopyvioMixIn): | |||||
else: | else: | ||||
self._is_talkpage = self._namespace % 2 == 1 | self._is_talkpage = self._namespace % 2 == 1 | ||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Page.""" | |||||
def __repr__(self) -> str: | |||||
""" | |||||
Return the canonical string representation of the Page. | |||||
""" | |||||
res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})" | res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})" | ||||
return res.format(self._title, self._follow_redirects, self._site) | return res.format(self._title, self._follow_redirects, self._site) | ||||
def __str__(self): | |||||
"""Return a nice string representation of the Page.""" | |||||
def __str__(self) -> str: | |||||
""" | |||||
Return a nice string representation of the Page. | |||||
""" | |||||
return f'<Page "{self.title}" of {str(self.site)}>' | return f'<Page "{self.title}" of {str(self.site)}>' | ||||
def _assert_validity(self): | |||||
"""Used to ensure that our page's title is valid. | |||||
def _assert_validity(self) -> None: | |||||
""" | |||||
Used to ensure that our page's title is valid. | |||||
If this method is called when our page is not valid (and after | If this method is called when our page is not valid (and after | ||||
_load_attributes() has been called), InvalidPageError will be raised. | _load_attributes() has been called), InvalidPageError will be raised. | ||||
Note that validity != existence. If a page's title is invalid (e.g, it | |||||
contains "[") it will always be invalid, and cannot be edited. | |||||
Note that validity != existence. If a page's title is invalid (e.g, it contains | |||||
"[") it will always be invalid, and cannot be edited. | |||||
""" | """ | ||||
if self._exists == self.PAGE_INVALID: | if self._exists == self.PAGE_INVALID: | ||||
e = f"Page '{self._title}' is invalid." | e = f"Page '{self._title}' is invalid." | ||||
raise exceptions.InvalidPageError(e) | raise exceptions.InvalidPageError(e) | ||||
def _assert_existence(self): | |||||
"""Used to ensure that our page exists. | |||||
def _assert_existence(self) -> None: | |||||
""" | |||||
Used to ensure that our page exists. | |||||
If this method is called when our page doesn't exist (and after | If this method is called when our page doesn't exist (and after | ||||
_load_attributes() has been called), PageNotFoundError will be raised. | |||||
It will also call _assert_validity() beforehand. | |||||
_load_attributes() has been called), PageNotFoundError will be raised. It will | |||||
also call _assert_validity() beforehand. | |||||
""" | """ | ||||
self._assert_validity() | self._assert_validity() | ||||
if self._exists == self.PAGE_MISSING: | if self._exists == self.PAGE_MISSING: | ||||
e = f"Page '{self._title}' does not exist." | e = f"Page '{self._title}' does not exist." | ||||
raise exceptions.PageNotFoundError(e) | raise exceptions.PageNotFoundError(e) | ||||
def _load(self): | |||||
"""Call _load_attributes() and follows redirects if we're supposed to. | |||||
def _load(self) -> None: | |||||
""" | |||||
Call _load_attributes() and follow redirects if we're supposed to. | |||||
This method will only follow redirects if follow_redirects=True was | |||||
passed to __init__() (perhaps indirectly passed by site.get_page()). | |||||
It avoids the API's &redirects param in favor of manual following, | |||||
so we can act more realistically (we don't follow double redirects, and | |||||
circular redirects don't break us). | |||||
This method will only follow redirects if follow_redirects=True was passed to | |||||
__init__() (perhaps indirectly passed by site.get_page()). It avoids the API's | |||||
&redirects param in favor of manual following, so we can act more realistically | |||||
(we don't follow double redirects, and circular redirects don't break us). | |||||
This will raise RedirectError if we have a problem following, but that | |||||
is a bug and should NOT happen. | |||||
This will raise RedirectError if we have a problem following, but that is a bug | |||||
and should NOT happen. | |||||
If we're following a redirect, this will make a grand total of three | |||||
API queries. It's a lot, but each one is quite small. | |||||
If we're following a redirect, this will make a grand total of three API | |||||
queries. It's a lot, but each one is quite small. | |||||
""" | """ | ||||
self._load_attributes() | self._load_attributes() | ||||
@@ -194,14 +217,14 @@ class Page(CopyvioMixIn): | |||||
self._content = None # reset the content we just loaded | self._content = None # reset the content we just loaded | ||||
self._load_attributes() | self._load_attributes() | ||||
def _load_attributes(self, result=None): | |||||
"""Load various data from the API in a single query. | |||||
def _load_attributes(self, result: dict | None = None) -> None: | |||||
""" | |||||
Load various data from the API in a single query. | |||||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, | |||||
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, and | |||||
._starttimestamp using the API. It will do a query of its own unless | |||||
*result* is provided, in which case we'll pretend *result* is what the | |||||
query returned. | |||||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, ._protection, | |||||
._namespace, ._is_talkpage, ._creator, ._lastrevid, and ._starttimestamp using | |||||
the API. It will do a query of its own unless *result* is provided, in which | |||||
case we'll pretend *result* is what the query returned. | |||||
Assuming the API is sound, this should not raise any exceptions. | Assuming the API is sound, this should not raise any exceptions. | ||||
""" | """ | ||||
@@ -217,6 +240,7 @@ class Page(CopyvioMixIn): | |||||
titles=self._title, | titles=self._title, | ||||
) | ) | ||||
assert result is not None | |||||
if "interwiki" in result["query"]: | if "interwiki" in result["query"]: | ||||
self._title = result["query"]["interwiki"][0]["title"] | self._title = result["query"]["interwiki"][0]["title"] | ||||
self._exists = self.PAGE_INVALID | self._exists = self.PAGE_INVALID | ||||
@@ -242,7 +266,7 @@ class Page(CopyvioMixIn): | |||||
self._fullurl = res["fullurl"] | self._fullurl = res["fullurl"] | ||||
self._protection = res["protection"] | self._protection = res["protection"] | ||||
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) | |||||
self._starttimestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) | |||||
# We've determined the namespace and talkpage status in __init__() | # We've determined the namespace and talkpage status in __init__() | ||||
# based on the title, but now we can be sure: | # based on the title, but now we can be sure: | ||||
@@ -256,15 +280,15 @@ class Page(CopyvioMixIn): | |||||
except KeyError: | except KeyError: | ||||
pass | pass | ||||
def _load_content(self, result=None): | |||||
"""Load current page content from the API. | |||||
def _load_content(self, result: dict | None = None) -> None: | |||||
""" | |||||
Load current page content from the API. | |||||
If *result* is provided, we'll pretend that is the result of an API | |||||
query and try to get content from that. Otherwise, we'll do an API | |||||
query on our own. | |||||
If *result* is provided, we'll pretend that is the result of an API query and | |||||
try to get content from that. Otherwise, we'll do an API query on our own. | |||||
Don't call this directly, ever; use reload() followed by get() if you | |||||
want to force content reloading. | |||||
Don't call this directly, ever; use reload() followed by get() if you want to | |||||
force content reloading. | |||||
""" | """ | ||||
if not result: | if not result: | ||||
query = self.site.api_query | query = self.site.api_query | ||||
@@ -277,6 +301,7 @@ class Page(CopyvioMixIn): | |||||
titles=self._title, | titles=self._title, | ||||
) | ) | ||||
assert result is not None | |||||
res = list(result["query"]["pages"].values())[0] | res = list(result["query"]["pages"].values())[0] | ||||
try: | try: | ||||
revision = res["revisions"][0] | revision = res["revisions"][0] | ||||
@@ -291,32 +316,32 @@ class Page(CopyvioMixIn): | |||||
def _edit( | def _edit( | ||||
self, | self, | ||||
params=None, | |||||
text=None, | |||||
summary=None, | |||||
minor=None, | |||||
bot=None, | |||||
force=None, | |||||
section=None, | |||||
captcha_id=None, | |||||
captcha_word=None, | |||||
**kwargs, | |||||
): | |||||
"""Edit the page! | |||||
If *params* is given, we'll use it as our API query parameters. | |||||
Otherwise, we'll build params using the given kwargs via | |||||
_build_edit_params(). | |||||
We'll then try to do the API query, and catch any errors the API raises | |||||
in _handle_edit_errors(). We'll then throw these back as subclasses of | |||||
EditError. | |||||
params: dict[str, Any] | None = None, | |||||
text: str | None = None, | |||||
summary: str | None = None, | |||||
minor: bool | None = None, | |||||
bot: bool | None = None, | |||||
force: bool | None = None, | |||||
section: int | str | None = None, | |||||
captcha_id: str | None = None, | |||||
captcha_word: str | None = None, | |||||
**kwargs: Any, | |||||
) -> None: | |||||
""" | |||||
Edit the page! | |||||
If *params* is given, we'll use it as our API query parameters. Otherwise, | |||||
we'll build params using the given kwargs via _build_edit_params(). | |||||
We'll then try to do the API query, and catch any errors the API raises in | |||||
_handle_edit_errors(). We'll then throw these back as subclasses of EditError. | |||||
""" | """ | ||||
# Weed out invalid pages before we get too far: | # Weed out invalid pages before we get too far: | ||||
self._assert_validity() | self._assert_validity() | ||||
# Build our API query string: | # Build our API query string: | ||||
if not params: | if not params: | ||||
assert text is not None, "Edit text must be provided when params are unset" | |||||
params = self._build_edit_params( | params = self._build_edit_params( | ||||
text, | text, | ||||
summary, | summary, | ||||
@@ -351,26 +376,26 @@ class Page(CopyvioMixIn): | |||||
def _build_edit_params( | def _build_edit_params( | ||||
self, | self, | ||||
text, | |||||
summary, | |||||
minor, | |||||
bot, | |||||
force, | |||||
section, | |||||
captcha_id, | |||||
captcha_word, | |||||
kwargs, | |||||
): | |||||
"""Given some keyword arguments, build an API edit query string.""" | |||||
unitxt = text.encode("utf8") if isinstance(text, str) else text | |||||
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct | |||||
text: str, | |||||
summary: str | None, | |||||
minor: bool | None, | |||||
bot: bool | None, | |||||
force: bool | None, | |||||
section: int | str | None, | |||||
captcha_id: str | None, | |||||
captcha_word: str | None, | |||||
kwargs: dict[str, Any], | |||||
) -> dict[str, Any]: | |||||
""" | |||||
Given some keyword arguments, build an API edit query string. | |||||
""" | |||||
params = { | params = { | ||||
"action": "edit", | "action": "edit", | ||||
"title": self._title, | "title": self._title, | ||||
"text": text, | "text": text, | ||||
"token": self.site.get_token(), | "token": self.site.get_token(), | ||||
"summary": summary, | "summary": summary, | ||||
"md5": hashed, | |||||
"md5": hashlib.md5(text.encode("utf-8")).hexdigest(), | |||||
} | } | ||||
if section: | if section: | ||||
@@ -403,12 +428,15 @@ class Page(CopyvioMixIn): | |||||
params[key] = val | params[key] = val | ||||
return params | return params | ||||
def _handle_edit_errors(self, error, params, retry=True): | |||||
"""If our edit fails due to some error, try to handle it. | |||||
def _handle_edit_errors( | |||||
self, error: APIError, params: dict[str, Any], retry: bool = True | |||||
) -> dict: | |||||
""" | |||||
If our edit fails due to some error, try to handle it. | |||||
We'll either raise an appropriate exception (for example, if the page | |||||
is protected), or we'll try to fix it (for example, if the token is | |||||
invalid, we'll try to get a new one). | |||||
We'll either raise an appropriate exception (for example, if the page is | |||||
protected), or we'll try to fix it (for example, if the token is invalid, we'll | |||||
try to get a new one). | |||||
""" | """ | ||||
perms = [ | perms = [ | ||||
"noedit", | "noedit", | ||||
@@ -447,27 +475,31 @@ class Page(CopyvioMixIn): | |||||
raise exceptions.EditError(": ".join((error.code, error.info))) | raise exceptions.EditError(": ".join((error.code, error.info))) | ||||
@property | @property | ||||
def site(self): | |||||
"""The page's corresponding Site object.""" | |||||
def site(self) -> Site: | |||||
""" | |||||
The page's corresponding Site object. | |||||
""" | |||||
return self._site | return self._site | ||||
@property | @property | ||||
def title(self): | |||||
"""The page's title, or "pagename". | |||||
def title(self) -> str: | |||||
""" | |||||
The page's title, or "pagename". | |||||
This won't do any API queries on its own. Any other attributes or | |||||
methods that do API queries will reload the title, however, like | |||||
:py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or | |||||
following redirects if :py:attr:`self._follow_redirects` is ``True``. | |||||
This won't do any API queries on its own. Any other attributes or methods that | |||||
do API queries will reload the title, however, like :py:attr:`exists` and | |||||
:py:meth:`get`, potentially "normalizing" it or following redirects if | |||||
:py:attr:`self._follow_redirects` is ``True``. | |||||
""" | """ | ||||
return self._title | return self._title | ||||
@property | @property | ||||
def exists(self): | |||||
"""Whether or not the page exists. | |||||
def exists(self) -> int: | |||||
""" | |||||
Whether or not the page exists. | |||||
This will be a number; its value does not matter, but it will equal | |||||
one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`, | |||||
This will be a number; its value does not matter, but it will equal one of | |||||
:py:attr:`self.PAGE_INVALID <PAGE_INVALID>`, | |||||
:py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or | :py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or | ||||
:py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`. | :py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`. | ||||
@@ -478,55 +510,60 @@ class Page(CopyvioMixIn): | |||||
return self._exists | return self._exists | ||||
@property | @property | ||||
def pageid(self): | |||||
"""An integer ID representing the page. | |||||
def pageid(self) -> int: | |||||
""" | |||||
An integer ID representing the page. | |||||
Makes an API query only if we haven't already made one and the *pageid* | Makes an API query only if we haven't already made one and the *pageid* | ||||
parameter to :py:meth:`__init__` was left as ``None``, which should be | |||||
true for all cases except when pages are returned by an SQL generator | |||||
(like :py:meth:`category.get_members() | |||||
parameter to :py:meth:`__init__` was left as ``None``, which should be true for | |||||
all cases except when pages are returned by an SQL generator (like | |||||
:py:meth:`category.get_members() | |||||
<earwigbot.wiki.category.Category.get_members>`). | <earwigbot.wiki.category.Category.get_members>`). | ||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | ||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||||
or the page does not exist, respectively. | |||||
""" | """ | ||||
if self._pageid: | if self._pageid: | ||||
return self._pageid | return self._pageid | ||||
if self._exists == self.PAGE_UNKNOWN: | if self._exists == self.PAGE_UNKNOWN: | ||||
self._load() | self._load() | ||||
self._assert_existence() # Missing pages do not have IDs | self._assert_existence() # Missing pages do not have IDs | ||||
assert self._pageid is not None, "Page exists but does not have an ID" | |||||
return self._pageid | return self._pageid | ||||
@property | @property | ||||
def url(self): | |||||
"""The page's URL. | |||||
def url(self) -> str: | |||||
""" | |||||
The page's URL. | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||||
API was never queried for this page, we will attempt to determine the | |||||
URL ourselves based on the title. | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the API was | |||||
never queried for this page, we will attempt to determine the URL ourselves | |||||
based on the title. | |||||
""" | """ | ||||
if self._fullurl: | if self._fullurl: | ||||
return self._fullurl | return self._fullurl | ||||
else: | else: | ||||
encoded = self._title.encode("utf8").replace(" ", "_") | |||||
slug = quote(encoded, safe="/:").decode("utf8") | |||||
path = self.site._article_path.replace("$1", slug) | |||||
encoded = self._title.replace(" ", "_") | |||||
slug = urllib.parse.quote(encoded, safe="/:") | |||||
path = self.site.article_path.replace("$1", slug) | |||||
return "".join((self.site.url, path)) | return "".join((self.site.url, path)) | ||||
@property | @property | ||||
def namespace(self): | |||||
"""The page's namespace ID (an integer). | |||||
def namespace(self) -> int: | |||||
""" | |||||
The page's namespace ID (an integer). | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||||
API was never queried for this page, we will attempt to determine the | |||||
namespace ourselves based on the title. | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the API was | |||||
never queried for this page, we will attempt to determine the namespace | |||||
ourselves based on the title. | |||||
""" | """ | ||||
return self._namespace | return self._namespace | ||||
@property | @property | ||||
def lastrevid(self): | |||||
"""The ID of the page's most recent revision. | |||||
def lastrevid(self) -> int | None: | |||||
""" | |||||
The ID of the page's most recent revision. | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | ||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | ||||
@@ -538,14 +575,15 @@ class Page(CopyvioMixIn): | |||||
return self._lastrevid | return self._lastrevid | ||||
@property | @property | ||||
def protection(self): | |||||
"""The page's current protection status. | |||||
def protection(self) -> dict | None: | |||||
""" | |||||
The page's current protection status. | |||||
Makes an API query only if we haven't already made one. | Makes an API query only if we haven't already made one. | ||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page | |||||
name is invalid. Won't raise an error if the page is missing because | |||||
those can still be create-protected. | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page name is | |||||
invalid. Won't raise an error if the page is missing because those can still be | |||||
create-protected. | |||||
""" | """ | ||||
if self._exists == self.PAGE_UNKNOWN: | if self._exists == self.PAGE_UNKNOWN: | ||||
self._load() | self._load() | ||||
@@ -553,17 +591,18 @@ class Page(CopyvioMixIn): | |||||
return self._protection | return self._protection | ||||
@property | @property | ||||
def is_talkpage(self): | |||||
"""``True`` if the page is a talkpage, otherwise ``False``. | |||||
def is_talkpage(self) -> bool: | |||||
""" | |||||
``True`` if the page is a talkpage, otherwise ``False``. | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||||
API was never queried for this page, we will attempt to determine | |||||
whether it is a talkpage ourselves based on its namespace. | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the API was | |||||
never queried for this page, we will attempt to determine whether it is a | |||||
talkpage ourselves based on its namespace. | |||||
""" | """ | ||||
return self._is_talkpage | return self._is_talkpage | ||||
@property | @property | ||||
def is_redirect(self): | |||||
def is_redirect(self) -> bool: | |||||
"""``True`` if the page is a redirect, otherwise ``False``. | """``True`` if the page is a redirect, otherwise ``False``. | ||||
Makes an API query only if we haven't already made one. | Makes an API query only if we haven't already made one. | ||||
@@ -572,34 +611,36 @@ class Page(CopyvioMixIn): | |||||
""" | """ | ||||
if self._exists == self.PAGE_UNKNOWN: | if self._exists == self.PAGE_UNKNOWN: | ||||
self._load() | self._load() | ||||
assert self._is_redirect is not None | |||||
return self._is_redirect | return self._is_redirect | ||||
def reload(self): | |||||
"""Forcibly reload the page's attributes. | |||||
def reload(self) -> None: | |||||
""" | |||||
Forcibly reload the page's attributes. | |||||
Emphasis on *reload*: this is only necessary if there is reason to | |||||
believe they have changed. | |||||
Emphasis on *reload*: this is only necessary if there is reason to believe they | |||||
have changed. | |||||
""" | """ | ||||
self._load() | self._load() | ||||
if self._content is not None: | if self._content is not None: | ||||
# Only reload content if it has already been loaded: | # Only reload content if it has already been loaded: | ||||
self._load_content() | self._load_content() | ||||
def toggle_talk(self, follow_redirects=None): | |||||
"""Return a content page's talk page, or vice versa. | |||||
def toggle_talk(self, follow_redirects: bool | None = None) -> Page: | |||||
""" | |||||
Return a content page's talk page, or vice versa. | |||||
The title of the new page is determined by namespace logic, not API | |||||
queries. We won't make any API queries on our own. | |||||
The title of the new page is determined by namespace logic, not API queries. | |||||
We won't make any API queries on our own. | |||||
If *follow_redirects* is anything other than ``None`` (the default), it | |||||
will be passed to the new :py:class:`~earwigbot.wiki.page.Page` | |||||
object's :py:meth:`__init__`. Otherwise, we'll use the value passed to | |||||
our own :py:meth:`__init__`. | |||||
If *follow_redirects* is anything other than ``None`` (the default), it will be | |||||
passed to the new :py:class:`~earwigbot.wiki.page.Page` object's | |||||
:py:meth:`__init__`. Otherwise, we'll use the value passed to our own | |||||
:py:meth:`__init__`. | |||||
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try | |||||
to get the talk page of a special page (in the ``Special:`` or | |||||
``Media:`` namespaces), but we won't raise an exception if our page is | |||||
otherwise missing or invalid. | |||||
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try to get | |||||
the talk page of a special page (in the ``Special:`` or ``Media:`` namespaces), | |||||
but we won't raise an exception if our page is otherwise missing or invalid. | |||||
""" | """ | ||||
if self._namespace < 0: | if self._namespace < 0: | ||||
ns = self.site.namespace_id_to_name(self._namespace) | ns = self.site.namespace_id_to_name(self._namespace) | ||||
@@ -629,11 +670,12 @@ class Page(CopyvioMixIn): | |||||
follow_redirects = self._follow_redirects | follow_redirects = self._follow_redirects | ||||
return Page(self.site, new_title, follow_redirects) | return Page(self.site, new_title, follow_redirects) | ||||
def get(self): | |||||
"""Return page content, which is cached if you try to call get again. | |||||
def get(self) -> str: | |||||
""" | |||||
Return page content, which is cached if you try to call get again. | |||||
Raises InvalidPageError or PageNotFoundError if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
Raises InvalidPageError or PageNotFoundError if the page name is invalid or the | |||||
page does not exist, respectively. | |||||
""" | """ | ||||
if self._exists == self.PAGE_UNKNOWN: | if self._exists == self.PAGE_UNKNOWN: | ||||
# Kill two birds with one stone by doing an API query for both our | # Kill two birds with one stone by doing an API query for both our | ||||
@@ -659,6 +701,7 @@ class Page(CopyvioMixIn): | |||||
self._exists = self.PAGE_UNKNOWN # Force another API query | self._exists = self.PAGE_UNKNOWN # Force another API query | ||||
self.get() | self.get() | ||||
assert self._content is not None | |||||
return self._content | return self._content | ||||
# Make sure we're dealing with a real page here. This may be outdated | # Make sure we're dealing with a real page here. This may be outdated | ||||
@@ -669,16 +712,17 @@ class Page(CopyvioMixIn): | |||||
if self._content is None: | if self._content is None: | ||||
self._load_content() | self._load_content() | ||||
assert self._content is not None | |||||
return self._content | return self._content | ||||
def get_redirect_target(self): | |||||
"""If the page is a redirect, return its destination. | |||||
def get_redirect_target(self) -> str: | |||||
""" | |||||
If the page is a redirect, return its destination. | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | ||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. Raises | |||||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a | |||||
redirect. | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||||
or the page does not exist, respectively. Raises | |||||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a redirect. | |||||
""" | """ | ||||
re_redirect = r"^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | re_redirect = r"^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | ||||
content = self.get() | content = self.get() | ||||
@@ -688,19 +732,20 @@ class Page(CopyvioMixIn): | |||||
e = "The page does not appear to have a redirect target." | e = "The page does not appear to have a redirect target." | ||||
raise exceptions.RedirectError(e) | raise exceptions.RedirectError(e) | ||||
def get_creator(self): | |||||
"""Return the User object for the first person to edit the page. | |||||
def get_creator(self) -> User: | |||||
""" | |||||
Return the User object for the first person to edit the page. | |||||
Makes an API query only if we haven't already made one. Normally, we | |||||
can get the creator along with everything else (except content) in | |||||
:py:meth:`_load_attributes`. However, due to a limitation in the API | |||||
(can't get the editor of one revision and the content of another at | |||||
both ends of the history), if our other attributes were only loaded | |||||
through :py:meth:`get`, we'll have to do another API query. | |||||
Makes an API query only if we haven't already made one. Normally, we can get | |||||
the creator along with everything else (except content) in | |||||
:py:meth:`_load_attributes`. However, due to a limitation in the API (can't get | |||||
the editor of one revision and the content of another at both ends of the | |||||
history), if our other attributes were only loaded through :py:meth:`get`, | |||||
we'll have to do another API query. | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | ||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||||
or the page does not exist, respectively. | |||||
""" | """ | ||||
if self._exists == self.PAGE_UNKNOWN: | if self._exists == self.PAGE_UNKNOWN: | ||||
self._load() | self._load() | ||||
@@ -710,41 +755,59 @@ class Page(CopyvioMixIn): | |||||
self._assert_existence() | self._assert_existence() | ||||
return self.site.get_user(self._creator) | return self.site.get_user(self._creator) | ||||
def parse(self): | |||||
"""Parse the page content for templates, links, etc. | |||||
def parse(self) -> mwparserfromhell.wikicode.Wikicode: | |||||
""" | |||||
Parse the page content for templates, links, etc. | |||||
Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises | Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises | ||||
:py:exc:`~earwigbot.exceptions.InvalidPageError` or | :py:exc:`~earwigbot.exceptions.InvalidPageError` or | ||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid | |||||
or the page does not exist, respectively. | |||||
""" | """ | ||||
return mwparserfromhell.parse(self.get()) | return mwparserfromhell.parse(self.get()) | ||||
def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs): | |||||
"""Replace the page's content or creates a new page. | |||||
def edit( | |||||
self, | |||||
text: str, | |||||
summary: str | None, | |||||
minor: bool = False, | |||||
bot: bool = True, | |||||
force: bool = False, | |||||
**kwargs: Any, | |||||
) -> None: | |||||
""" | |||||
Replace the page's content or creates a new page. | |||||
*text* is the new page content, with *summary* as the edit summary. | |||||
If *minor* is ``True``, the edit will be marked as minor. If *bot* is | |||||
``True``, the edit will be marked as a bot edit, but only if we | |||||
actually have a bot flag. | |||||
*text* is the new page content, with *summary* as the edit summary. If *minor* | |||||
is ``True``, the edit will be marked as minor. If *bot* is ``True``, the edit | |||||
will be marked as a bot edit, but only if we actually have a bot flag. | |||||
Use *force* to push the new content even if there's an edit conflict or | |||||
the page was deleted/recreated between getting our edit token and | |||||
editing our page. Be careful with this! | |||||
Use *force* to push the new content even if there's an edit conflict or the | |||||
page was deleted/recreated between getting our edit token and editing our page. | |||||
Be careful with this! | |||||
""" | """ | ||||
self._edit( | self._edit( | ||||
text=text, summary=summary, minor=minor, bot=bot, force=force, **kwargs | text=text, summary=summary, minor=minor, bot=bot, force=force, **kwargs | ||||
) | ) | ||||
def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs): | |||||
"""Add a new section to the bottom of the page. | |||||
def add_section( | |||||
self, | |||||
text: str, | |||||
title: str, | |||||
minor: bool = False, | |||||
bot: bool = True, | |||||
force: bool = False, | |||||
**kwargs: Any, | |||||
) -> None: | |||||
""" | |||||
Add a new section to the bottom of the page. | |||||
The arguments for this are the same as those for :py:meth:`edit`, but | |||||
instead of providing a summary, you provide a section title. Likewise, | |||||
raised exceptions are the same as :py:meth:`edit`'s. | |||||
The arguments for this are the same as those for :py:meth:`edit`, but instead | |||||
of providing a summary, you provide a section title. Likewise, raised | |||||
exceptions are the same as :py:meth:`edit`'s. | |||||
This should create the page if it does not already exist, with just the | |||||
new section as content. | |||||
This should create the page if it does not already exist, with just the new | |||||
section as content. | |||||
""" | """ | ||||
self._edit( | self._edit( | ||||
text=text, | text=text, | ||||
@@ -756,25 +819,27 @@ class Page(CopyvioMixIn): | |||||
**kwargs, | **kwargs, | ||||
) | ) | ||||
def check_exclusion(self, username=None, optouts=None): | |||||
"""Check whether or not we are allowed to edit the page. | |||||
def check_exclusion( | |||||
self, username: str | None = None, optouts: Iterable[str] | None = None | |||||
) -> bool: | |||||
""" | |||||
Check whether or not we are allowed to edit the page. | |||||
Return ``True`` if we *are* allowed to edit this page, and ``False`` if | Return ``True`` if we *are* allowed to edit this page, and ``False`` if | ||||
we aren't. | we aren't. | ||||
*username* is used to determine whether we are part of a specific list | |||||
of allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or | |||||
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which | |||||
will swipe our username from :py:meth:`site.get_user() | |||||
*username* is used to determine whether we are part of a specific list of | |||||
allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or | |||||
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which will swipe | |||||
our username from :py:meth:`site.get_user() | |||||
<earwigbot.wiki.site.Site.get_user>`.\ | <earwigbot.wiki.site.Site.get_user>`.\ | ||||
:py:attr:`~earwigbot.wiki.user.User.name`. | :py:attr:`~earwigbot.wiki.user.User.name`. | ||||
*optouts* is a list of messages to consider this check as part of for | |||||
the purpose of opt-out; it defaults to ``None``, which ignores the | |||||
parameter completely. For example, if *optouts* is ``["nolicense"]``, | |||||
we'll return ``False`` on ``{{bots|optout=nolicense}}`` or | |||||
``{{bots|optout=all}}``, but `True` on | |||||
``{{bots|optout=orfud,norationale,replaceable}}``. | |||||
*optouts* is a list of messages to consider this check as part of for the | |||||
purpose of opt-out; it defaults to ``None``, which ignores the parameter | |||||
completely. For example, if *optouts* is ``["nolicense"]``, we'll return | |||||
``False`` on ``{{bots|optout=nolicense}}`` or ``{{bots|optout=all}}``, but | |||||
`True` on ``{{bots|optout=orfud,norationale,replaceable}}``. | |||||
""" | """ | ||||
def parse_param(template, param): | def parse_param(template, param): | ||||
@@ -18,78 +18,102 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import annotations | |||||
import errno | import errno | ||||
import sqlite3 as sqlite | import sqlite3 as sqlite | ||||
import stat | import stat | ||||
import typing | |||||
from collections import OrderedDict | from collections import OrderedDict | ||||
from http.cookiejar import LoadError, LWPCookieJar | |||||
from dataclasses import dataclass | |||||
from http.cookiejar import CookieJar, LoadError, LWPCookieJar | |||||
from os import chmod, path | from os import chmod, path | ||||
from platform import python_version | from platform import python_version | ||||
from earwigbot import __version__ | from earwigbot import __version__ | ||||
from earwigbot.exceptions import SiteNotFoundError | from earwigbot.exceptions import SiteNotFoundError | ||||
from earwigbot.wiki.copyvios.exclusions import ExclusionsDB | from earwigbot.wiki.copyvios.exclusions import ExclusionsDB | ||||
from earwigbot.wiki.site import Site | |||||
from earwigbot.wiki.site import Site, SqlConnInfo | |||||
if typing.TYPE_CHECKING: | |||||
from earwigbot.bot import Bot | |||||
__all__ = ["SitesDB"] | __all__ = ["SitesDB"] | ||||
@dataclass(frozen=True) | |||||
class _SiteInfoFromDB: | |||||
name: str | |||||
project: str | |||||
lang: str | |||||
base_url: str | |||||
article_path: str | |||||
script_path: str | |||||
sql: SqlConnInfo | |||||
namespaces: dict[int, list[str]] | |||||
class SitesDB: | class SitesDB: | ||||
""" | """ | ||||
**EarwigBot: Wiki Toolset: Sites Database Manager** | **EarwigBot: Wiki Toolset: Sites Database Manager** | ||||
This class controls the :file:`sites.db` file, which stores information | |||||
about all wiki sites known to the bot. Three public methods act as bridges | |||||
between the bot's config files and :py:class:`~earwigbot.wiki.site.Site` | |||||
objects: | |||||
This class controls the :file:`sites.db` file, which stores information about all | |||||
wiki sites known to the bot. Three public methods act as bridges between the bot's | |||||
config files and :py:class:`~earwigbot.wiki.site.Site` objects: | |||||
- :py:meth:`get_site`: returns a Site object corresponding to a site | - :py:meth:`get_site`: returns a Site object corresponding to a site | ||||
- :py:meth:`add_site`: stores a site in the database | - :py:meth:`add_site`: stores a site in the database | ||||
- :py:meth:`remove_site`: removes a site from the database | - :py:meth:`remove_site`: removes a site from the database | ||||
There's usually no need to use this class directly. All public methods | |||||
here are available as :py:meth:`bot.wiki.get_site`, | |||||
:py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which | |||||
use a :file:`sites.db` file located in the same directory as our | |||||
:file:`config.yml` file. Lower-level access can be achieved by importing | |||||
the manager class (``from earwigbot.wiki import SitesDB``). | |||||
There's usually no need to use this class directly. All public methods here are | |||||
available as :py:meth:`bot.wiki.get_site`, :py:meth:`bot.wiki.add_site`, and | |||||
:py:meth:`bot.wiki.remove_site`, which use a :file:`sites.db` file located in the | |||||
same directory as our :file:`config.yml` file. Lower-level access can be achieved | |||||
by importing the manager class (``from earwigbot.wiki import SitesDB``). | |||||
""" | """ | ||||
def __init__(self, bot): | |||||
"""Set up the manager with an attribute for the base Bot object.""" | |||||
def __init__(self, bot: Bot) -> None: | |||||
""" | |||||
Set up the manager with an attribute for the base Bot object. | |||||
""" | |||||
self.config = bot.config | self.config = bot.config | ||||
self._logger = bot.logger.getChild("wiki") | self._logger = bot.logger.getChild("wiki") | ||||
self._sites = {} # Internal site cache | |||||
self._sites: dict[str, Site] = {} # Internal site cache | |||||
self._sitesdb = path.join(bot.config.root_dir, "sites.db") | self._sitesdb = path.join(bot.config.root_dir, "sites.db") | ||||
self._cookie_file = path.join(bot.config.root_dir, ".cookies") | self._cookie_file = path.join(bot.config.root_dir, ".cookies") | ||||
self._cookiejar = None | |||||
self._cookiejar: CookieJar | None = None | |||||
excl_db = path.join(bot.config.root_dir, "exclusions.db") | excl_db = path.join(bot.config.root_dir, "exclusions.db") | ||||
excl_logger = self._logger.getChild("exclusionsdb") | excl_logger = self._logger.getChild("exclusionsdb") | ||||
self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger) | self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger) | ||||
def __repr__(self): | |||||
"""Return the canonical string representation of the SitesDB.""" | |||||
def __repr__(self) -> str: | |||||
""" | |||||
Return the canonical string representation of the SitesDB. | |||||
""" | |||||
res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})" | res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})" | ||||
return res.format(self.config, self._sitesdb, self._cookie_file) | return res.format(self.config, self._sitesdb, self._cookie_file) | ||||
def __str__(self): | |||||
"""Return a nice string representation of the SitesDB.""" | |||||
def __str__(self) -> str: | |||||
""" | |||||
Return a nice string representation of the SitesDB. | |||||
""" | |||||
return f"<SitesDB at {self._sitesdb}>" | return f"<SitesDB at {self._sitesdb}>" | ||||
def _get_cookiejar(self): | |||||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||||
def _get_cookiejar(self) -> CookieJar: | |||||
""" | |||||
Return a LWPCookieJar object loaded from our .cookies file. | |||||
The same .cookies file is returned every time, located in the project | |||||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||||
will create the file and set it to be readable and writeable only by | |||||
us. If it exists but the information inside is bogus, we'll ignore it. | |||||
The same .cookies file is returned every time, located in the project root, | |||||
same directory as config.yml and bot.py. If it doesn't exist, we will create | |||||
the file and set it to be readable and writeable only by us. If it exists but | |||||
the information inside is bogus, we'll ignore it. | |||||
This is normally called by _make_site_object() (in turn called by | |||||
get_site()), and the cookiejar is passed to our Site's constructor, | |||||
used when it makes API queries. This way, we can easily preserve | |||||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||||
This is normally called by _make_site_object() (in turn called by get_site()), | |||||
and the cookiejar is passed to our Site's constructor, used when it makes API | |||||
queries. This way, we can easily preserve cookies between sites (e.g., for | |||||
CentralAuth), making logins easier. | |||||
""" | """ | ||||
if self._cookiejar: | if self._cookiejar: | ||||
return self._cookiejar | return self._cookiejar | ||||
@@ -111,8 +135,10 @@ class SitesDB: | |||||
return self._cookiejar | return self._cookiejar | ||||
def _create_sitesdb(self): | |||||
"""Initialize the sitesdb file with its three necessary tables.""" | |||||
def _create_sitesdb(self) -> None: | |||||
""" | |||||
Initialize the sitesdb file with its three necessary tables. | |||||
""" | |||||
script = """ | script = """ | ||||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | ||||
site_article_path, site_script_path); | site_article_path, site_script_path); | ||||
@@ -122,11 +148,12 @@ class SitesDB: | |||||
with sqlite.connect(self._sitesdb) as conn: | with sqlite.connect(self._sitesdb) as conn: | ||||
conn.executescript(script) | conn.executescript(script) | ||||
def _get_site_object(self, name): | |||||
"""Return the site from our cache, or create it if it doesn't exist. | |||||
def _get_site_object(self, name: str) -> Site: | |||||
""" | |||||
Return the site from our cache, or create it if it doesn't exist. | |||||
This is essentially just a wrapper around _make_site_object that | |||||
returns the same object each time a specific site is asked for. | |||||
This is essentially just a wrapper around _make_site_object that returns the | |||||
same object each time a specific site is asked for. | |||||
""" | """ | ||||
try: | try: | ||||
return self._sites[name] | return self._sites[name] | ||||
@@ -135,14 +162,12 @@ class SitesDB: | |||||
self._sites[name] = site | self._sites[name] = site | ||||
return site | return site | ||||
def _load_site_from_sitesdb(self, name): | |||||
"""Return all information stored in the sitesdb relating to given site. | |||||
def _load_site_from_sitesdb(self, name: str) -> _SiteInfoFromDB: | |||||
""" | |||||
Return all information stored in the sitesdb relating to given site. | |||||
The information will be returned as a tuple, containing the site's | |||||
name, project, language, base URL, article path, script path, SQL | |||||
connection data, and namespaces, in that order. If the site is not | |||||
found in the database, SiteNotFoundError will be raised. An empty | |||||
database will be created before the exception is raised if none exists. | |||||
If the site is not found in the database, SiteNotFoundError will be raised. An | |||||
empty database will be created before the exception is raised if none exists. | |||||
""" | """ | ||||
query1 = "SELECT * FROM sites WHERE site_name = ?" | query1 = "SELECT * FROM sites WHERE site_name = ?" | ||||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | ||||
@@ -161,7 +186,7 @@ class SitesDB: | |||||
name, project, lang, base_url, article_path, script_path = site_data | name, project, lang, base_url, article_path, script_path = site_data | ||||
sql = dict(sql_data) | sql = dict(sql_data) | ||||
namespaces = {} | |||||
namespaces: dict[int, list[str]] = {} | |||||
for ns_id, ns_name, ns_is_primary_name in ns_data: | for ns_id, ns_name, ns_is_primary_name in ns_data: | ||||
try: | try: | ||||
if ns_is_primary_name: # "Primary" name goes first in list | if ns_is_primary_name: # "Primary" name goes first in list | ||||
@@ -171,7 +196,7 @@ class SitesDB: | |||||
except KeyError: | except KeyError: | ||||
namespaces[ns_id] = [ns_name] | namespaces[ns_id] = [ns_name] | ||||
return ( | |||||
return _SiteInfoFromDB( | |||||
name, | name, | ||||
project, | project, | ||||
lang, | lang, | ||||
@@ -182,16 +207,16 @@ class SitesDB: | |||||
namespaces, | namespaces, | ||||
) | ) | ||||
def _make_site_object(self, name): | |||||
"""Return a Site object associated with the site *name* in our sitesdb. | |||||
def _make_site_object(self, name: str) -> Site: | |||||
""" | |||||
Return a Site object associated with the site *name* in our sitesdb. | |||||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||||
raised if the site is not in our sitesdb. | |||||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised if | |||||
the site is not in our sitesdb. | |||||
""" | """ | ||||
cookiejar = self._get_cookiejar() | cookiejar = self._get_cookiejar() | ||||
(name, project, lang, base_url, article_path, script_path, sql, namespaces) = ( | |||||
self._load_site_from_sitesdb(name) | |||||
) | |||||
info = self._load_site_from_sitesdb(name) | |||||
name = info.name | |||||
config = self.config | config = self.config | ||||
login = (config.wiki.get("username"), config.wiki.get("password")) | login = (config.wiki.get("username"), config.wiki.get("password")) | ||||
@@ -213,6 +238,7 @@ class SitesDB: | |||||
search_config["nltk_dir"] = nltk_dir | search_config["nltk_dir"] = nltk_dir | ||||
search_config["exclusions_db"] = self._exclusions_db | search_config["exclusions_db"] = self._exclusions_db | ||||
sql = info.sql | |||||
if not sql: | if not sql: | ||||
sql = config.wiki.get("sql", OrderedDict()).copy() | sql = config.wiki.get("sql", OrderedDict()).copy() | ||||
for key, value in sql.items(): | for key, value in sql.items(): | ||||
@@ -221,13 +247,13 @@ class SitesDB: | |||||
return Site( | return Site( | ||||
name=name, | name=name, | ||||
project=project, | |||||
lang=lang, | |||||
base_url=base_url, | |||||
article_path=article_path, | |||||
script_path=script_path, | |||||
project=info.project, | |||||
lang=info.lang, | |||||
base_url=info.base_url, | |||||
article_path=info.article_path, | |||||
script_path=info.script_path, | |||||
sql=sql, | sql=sql, | ||||
namespaces=namespaces, | |||||
namespaces=info.namespaces, | |||||
login=login, | login=login, | ||||
oauth=oauth, | oauth=oauth, | ||||
cookiejar=cookiejar, | cookiejar=cookiejar, | ||||
@@ -240,18 +266,18 @@ class SitesDB: | |||||
search_config=search_config, | search_config=search_config, | ||||
) | ) | ||||
def _get_site_name_from_sitesdb(self, project, lang): | |||||
"""Return the name of the first site with the given project and lang. | |||||
def _get_site_name_from_sitesdb(self, project: str, lang: str) -> str | None: | |||||
""" | |||||
Return the name of the first site with the given project and lang. | |||||
If we can't find the site with the given information, we'll also try | |||||
searching for a site whose base_url contains "{lang}.{project}". There | |||||
are a few sites, like the French Wikipedia, that set their project to | |||||
something other than the expected "wikipedia" ("wikipédia" in this | |||||
case), but we should correctly find them when doing get_site(lang="fr", | |||||
project="wikipedia"). | |||||
If we can't find the site with the given information, we'll also try searching | |||||
for a site whose base_url contains "{lang}.{project}". There are a few sites, | |||||
like the French Wikipedia, that set their project to something other than the | |||||
expected "wikipedia" ("wikipédia" in this case), but we should correctly find | |||||
them when doing get_site(lang="fr", project="wikipedia"). | |||||
If the site is not found, return None. An empty sitesdb will be created | |||||
if none exists. | |||||
If the site is not found, return None. An empty sitesdb will be created if | |||||
none exists. | |||||
""" | """ | ||||
query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?" | query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?" | ||||
query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?" | query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?" | ||||
@@ -267,26 +293,27 @@ class SitesDB: | |||||
except sqlite.OperationalError: | except sqlite.OperationalError: | ||||
self._create_sitesdb() | self._create_sitesdb() | ||||
def _add_site_to_sitesdb(self, site): | |||||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||||
def _add_site_to_sitesdb(self, site: Site) -> None: | |||||
""" | |||||
Extract relevant info from a Site object and add it to the sitesdb. | |||||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||||
language, base URL, article path, script path, SQL connection data, and | |||||
namespaces are extracted from the site and inserted into the sites | |||||
database. If the sitesdb doesn't exist, we'll create it first. | |||||
Works like a reverse _load_site_from_sitesdb(); the site's project, language, | |||||
base URL, article path, script path, SQL connection data, and namespaces are | |||||
extracted from the site and inserted into the sites database. If the sitesdb | |||||
doesn't exist, we'll create it first. | |||||
""" | """ | ||||
name = site.name | name = site.name | ||||
sites_data = ( | sites_data = ( | ||||
name, | name, | ||||
site.project, | site.project, | ||||
site.lang, | site.lang, | ||||
site._base_url, | |||||
site._article_path, | |||||
site._script_path, | |||||
site.base_url, | |||||
site.article_path, | |||||
site.script_path, | |||||
) | ) | ||||
sql_data = [(name, key, val) for key, val in site._sql_data.items()] | sql_data = [(name, key, val) for key, val in site._sql_data.items()] | ||||
ns_data = [] | |||||
for ns_id, ns_names in site._namespaces.items(): | |||||
ns_data: list[tuple[str, int, str, bool]] = [] | |||||
for ns_id, ns_names in site.namespaces.items(): | |||||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | ns_data.append((name, ns_id, ns_names.pop(0), True)) | ||||
for ns_name in ns_names: | for ns_name in ns_names: | ||||
ns_data.append((name, ns_id, ns_name, False)) | ns_data.append((name, ns_id, ns_name, False)) | ||||
@@ -306,8 +333,10 @@ class SitesDB: | |||||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | ||||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | ||||
def _remove_site_from_sitesdb(self, name): | |||||
"""Remove a site by name from the sitesdb and the internal cache.""" | |||||
def _remove_site_from_sitesdb(self, name: str) -> bool: | |||||
""" | |||||
Remove a site by name from the sitesdb and the internal cache. | |||||
""" | |||||
try: | try: | ||||
del self._sites[name] | del self._sites[name] | ||||
except KeyError: | except KeyError: | ||||
@@ -323,30 +352,34 @@ class SitesDB: | |||||
self._logger.info(f"Removed site '{name}'") | self._logger.info(f"Removed site '{name}'") | ||||
return True | return True | ||||
def get_site(self, name=None, project=None, lang=None): | |||||
"""Return a Site instance based on information from the sitesdb. | |||||
def get_site( | |||||
self, | |||||
name: str | None = None, | |||||
project: str | None = None, | |||||
lang: str | None = None, | |||||
) -> Site: | |||||
""" | |||||
Return a Site instance based on information from the sitesdb. | |||||
With no arguments, return the default site as specified by our config | |||||
file. This is ``config.wiki["defaultSite"]``. | |||||
With no arguments, return the default site as specified by our config file. | |||||
This is ``config.wiki["defaultSite"]``. | |||||
With *name* specified, return the site with that name. This is | |||||
equivalent to the site's ``wikiid`` in the API, like *enwiki*. | |||||
With *name* specified, return the site with that name. This is equivalent to | |||||
the site's ``wikiid`` in the API, like *enwiki*. | |||||
With *project* and *lang* specified, return the site whose project and | |||||
language match these values. If there are multiple sites with the same | |||||
values (unlikely), this is not a reliable way of loading a site. Call | |||||
the function with an explicit *name* in that case. | |||||
With *project* and *lang* specified, return the site whose project and language | |||||
match these values. If there are multiple sites with the same values | |||||
(unlikely), this is not a reliable way of loading a site. Call the function | |||||
with an explicit *name* in that case. | |||||
We will attempt to login to the site automatically using | We will attempt to login to the site automatically using | ||||
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are | |||||
defined. | |||||
Specifying a project without a lang or a lang without a project will | |||||
raise :py:exc:`TypeError`. If all three args are specified, *name* will | |||||
be first tried, then *project* and *lang* if *name* doesn't work. If a | |||||
site cannot be found in the sitesdb, | |||||
:py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An | |||||
empty sitesdb will be created if none is found. | |||||
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are defined. | |||||
Specifying a project without a lang or a lang without a project will raise | |||||
:py:exc:`TypeError`. If all three args are specified, *name* will be first | |||||
tried, then *project* and *lang* if *name* doesn't work. If a site cannot be | |||||
found in the sitesdb, :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be | |||||
raised. An empty sitesdb will be created if none is found. | |||||
""" | """ | ||||
# Someone specified a project without a lang, or vice versa: | # Someone specified a project without a lang, or vice versa: | ||||
if (project and not lang) or (not project and lang): | if (project and not lang) or (not project and lang): | ||||
@@ -374,6 +407,7 @@ class SitesDB: | |||||
raise | raise | ||||
# If we end up here, then project and lang are the only args given: | # If we end up here, then project and lang are the only args given: | ||||
assert project is not None and lang is not None, (project, lang) | |||||
name = self._get_site_name_from_sitesdb(project, lang) | name = self._get_site_name_from_sitesdb(project, lang) | ||||
if name: | if name: | ||||
return self._get_site_object(name) | return self._get_site_object(name) | ||||
@@ -381,30 +415,34 @@ class SitesDB: | |||||
raise SiteNotFoundError(e) | raise SiteNotFoundError(e) | ||||
def add_site( | def add_site( | ||||
self, project=None, lang=None, base_url=None, script_path="/w", sql=None | |||||
): | |||||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||||
self, | |||||
project: str | None = None, | |||||
lang: str | None = None, | |||||
base_url: str | None = None, | |||||
script_path: str = "/w", | |||||
sql: SqlConnInfo | None = None, | |||||
) -> Site: | |||||
""" | |||||
Add a site to the sitesdb so it can be retrieved with get_site(). | |||||
If only a project and a lang are given, we'll guess the *base_url* as | If only a project and a lang are given, we'll guess the *base_url* as | ||||
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming | |||||
``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``). | |||||
If this is wrong, provide the correct *base_url* as an argument (in | |||||
which case project and lang are ignored). Most wikis use ``"/w"`` as | |||||
the script path (meaning the API is located at | |||||
``"{base_url}{script_path}/api.php"`` -> | |||||
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If | |||||
your wiki is different, provide the script_path as an argument. SQL | |||||
connection settings are guessed automatically using config's template | |||||
value. If this is wrong or not specified, provide a dict of kwargs as | |||||
*sql* and Site will pass it to :py:func:`pymysql.connect(**sql) | |||||
<pymysql.connect>`, allowing you to make queries with | |||||
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||||
Returns ``True`` if the site was added successfully or ``False`` if the | |||||
site is already in our sitesdb (this can be done purposefully to update | |||||
old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` | |||||
if not enough information has been provided to identify the site (e.g. | |||||
a *project* but not a *lang*). | |||||
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming ``"https"`` | |||||
if *useHTTPS* is ``True`` in config otherwise ``"http"``). If this is wrong, | |||||
provide the correct *base_url* as an argument (in which case project and lang | |||||
are ignored). Most wikis use ``"/w"`` as the script path (meaning the API is | |||||
located at ``"{base_url}{script_path}/api.php"`` -> | |||||
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If your wiki | |||||
is different, provide the script_path as an argument. SQL connection settings | |||||
are guessed automatically using config's template value. If this is wrong or | |||||
not specified, provide a dict of kwargs as *sql* and Site will pass it to | |||||
:py:func:`pymysql.connect(**sql) <pymysql.connect>`, allowing you to make | |||||
queries with :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||||
Returns ``True`` if the site was added successfully or ``False`` if the site is | |||||
already in our sitesdb (this can be done purposefully to update old site info). | |||||
Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` if not enough | |||||
information has been provided to identify the site (e.g. a *project* but not | |||||
a *lang*). | |||||
""" | """ | ||||
if not base_url: | if not base_url: | ||||
if not project or not lang: | if not project or not lang: | ||||
@@ -445,7 +483,12 @@ class SitesDB: | |||||
self._add_site_to_sitesdb(site) | self._add_site_to_sitesdb(site) | ||||
return self._get_site_object(site.name) | return self._get_site_object(site.name) | ||||
def remove_site(self, name=None, project=None, lang=None): | |||||
def remove_site( | |||||
self, | |||||
name: str | None = None, | |||||
project: str | None = None, | |||||
lang: str | None = None, | |||||
) -> bool: | |||||
"""Remove a site from the sitesdb. | """Remove a site from the sitesdb. | ||||
Returns ``True`` if the site was removed successfully or ``False`` if | Returns ``True`` if the site was removed successfully or ``False`` if | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,14 +18,21 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from logging import NullHandler, getLogger | |||||
from socket import AF_INET, AF_INET6, inet_pton | |||||
from time import gmtime, strptime | |||||
from __future__ import annotations | |||||
import socket | |||||
import time | |||||
import typing | |||||
from logging import Logger, NullHandler, getLogger | |||||
from typing import Any, Literal | |||||
from earwigbot.exceptions import UserNotFoundError | from earwigbot.exceptions import UserNotFoundError | ||||
from earwigbot.wiki import constants | from earwigbot.wiki import constants | ||||
from earwigbot.wiki.page import Page | from earwigbot.wiki.page import Page | ||||
if typing.TYPE_CHECKING: | |||||
from earwigbot.wiki.site import Site | |||||
__all__ = ["User"] | __all__ = ["User"] | ||||
@@ -33,10 +40,9 @@ class User: | |||||
""" | """ | ||||
**EarwigBot: Wiki Toolset: User** | **EarwigBot: Wiki Toolset: User** | ||||
Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||||
methods for getting a bunch of information about the user, such as | |||||
editcount and user rights, methods for returning the user's userpage and | |||||
talkpage, etc. | |||||
Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has methods for | |||||
getting a bunch of information about the user, such as editcount and user rights, | |||||
methods for returning the user's userpage and talkpage, etc. | |||||
*Attributes:* | *Attributes:* | ||||
@@ -56,24 +62,23 @@ class User: | |||||
*Public methods:* | *Public methods:* | ||||
- :py:meth:`reload`: forcibly reloads the user's attributes | - :py:meth:`reload`: forcibly reloads the user's attributes | ||||
- :py:meth:`get_userpage`: returns a Page object representing the user's | |||||
userpage | |||||
- :py:meth:`get_talkpage`: returns a Page object representing the user's | |||||
talkpage | |||||
- :py:meth:`get_userpage`: returns a Page object representing the user's userpage | |||||
- :py:meth:`get_talkpage`: returns a Page object representing the user's talkpage | |||||
""" | """ | ||||
def __init__(self, site, name, logger=None): | |||||
"""Constructor for new User instances. | |||||
def __init__(self, site: Site, name: str, logger: Logger | None = None) -> None: | |||||
""" | |||||
Constructor for new User instances. | |||||
Takes two arguments, a Site object (necessary for doing API queries), | |||||
and the name of the user, preferably without "User:" in front, although | |||||
this prefix will be automatically removed by the API if given. | |||||
Takes two arguments, a Site object (necessary for doing API queries), and the | |||||
name of the user, preferably without "User:" in front, although this prefix | |||||
will be automatically removed by the API if given. | |||||
You can also use site.get_user() instead, which returns a User object, | |||||
and is preferred. | |||||
You can also use site.get_user() instead, which returns a User object, and | |||||
is preferred. | |||||
We won't do any API queries yet for basic information about the user - | |||||
save that for when the information is requested. | |||||
We won't do any API queries yet for basic information about the user - save | |||||
that for when the information is requested. | |||||
""" | """ | ||||
self._site = site | self._site = site | ||||
self._name = name | self._name = name | ||||
@@ -85,22 +90,27 @@ class User: | |||||
self._logger = getLogger("earwigbot.wiki") | self._logger = getLogger("earwigbot.wiki") | ||||
self._logger.addHandler(NullHandler()) | self._logger.addHandler(NullHandler()) | ||||
def __repr__(self): | |||||
"""Return the canonical string representation of the User.""" | |||||
def __repr__(self) -> str: | |||||
""" | |||||
Return the canonical string representation of the User. | |||||
""" | |||||
return f"User(name={self._name!r}, site={self._site!r})" | return f"User(name={self._name!r}, site={self._site!r})" | ||||
def __str__(self): | |||||
"""Return a nice string representation of the User.""" | |||||
def __str__(self) -> str: | |||||
""" | |||||
Return a nice string representation of the User. | |||||
""" | |||||
return f'<User "{self.name}" of {str(self.site)}>' | return f'<User "{self.name}" of {str(self.site)}>' | ||||
def _get_attribute(self, attr): | |||||
"""Internally used to get an attribute by name. | |||||
def _get_attribute(self, attr: str) -> Any: | |||||
""" | |||||
Internally used to get an attribute by name. | |||||
We'll call _load_attributes() to get this (and all other attributes) | |||||
from the API if it is not already defined. | |||||
We'll call _load_attributes() to get this (and all other attributes) from the | |||||
API if it is not already defined. | |||||
Raises UserNotFoundError if a nonexistant user prevents us from | |||||
returning a certain attribute. | |||||
Raises UserNotFoundError if a nonexistant user prevents us from returning a | |||||
certain attribute. | |||||
""" | """ | ||||
if not hasattr(self, attr): | if not hasattr(self, attr): | ||||
self._load_attributes() | self._load_attributes() | ||||
@@ -109,11 +119,12 @@ class User: | |||||
raise UserNotFoundError(e) | raise UserNotFoundError(e) | ||||
return getattr(self, attr) | return getattr(self, attr) | ||||
def _load_attributes(self): | |||||
"""Internally used to load all attributes from the API. | |||||
def _load_attributes(self) -> None: | |||||
""" | |||||
Internally used to load all attributes from the API. | |||||
Normally, this is called by _get_attribute() when a requested attribute | |||||
is not defined. This defines it. | |||||
Normally, this is called by _get_attribute() when a requested attribute is not | |||||
defined. This defines it. | |||||
""" | """ | ||||
props = "blockinfo|groups|rights|editcount|registration|emailable|gender" | props = "blockinfo|groups|rights|editcount|registration|emailable|gender" | ||||
result = self.site.api_query( | result = self.site.api_query( | ||||
@@ -150,11 +161,11 @@ class User: | |||||
reg = res["registration"] | reg = res["registration"] | ||||
try: | try: | ||||
self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") | |||||
self._registration = time.strptime(reg, "%Y-%m-%dT%H:%M:%SZ") | |||||
except TypeError: | except TypeError: | ||||
# Sometimes the API doesn't give a date; the user's probably really | # Sometimes the API doesn't give a date; the user's probably really | ||||
# old. There's nothing else we can do! | # old. There's nothing else we can do! | ||||
self._registration = gmtime(0) | |||||
self._registration = time.gmtime(0) | |||||
try: | try: | ||||
res["emailable"] | res["emailable"] | ||||
@@ -166,24 +177,28 @@ class User: | |||||
self._gender = res["gender"] | self._gender = res["gender"] | ||||
@property | @property | ||||
def site(self): | |||||
"""The user's corresponding Site object.""" | |||||
def site(self) -> Site: | |||||
""" | |||||
The user's corresponding Site object. | |||||
""" | |||||
return self._site | return self._site | ||||
@property | @property | ||||
def name(self): | |||||
"""The user's username. | |||||
def name(self) -> str: | |||||
""" | |||||
The user's username. | |||||
This will never make an API query on its own, but if one has already | |||||
been made by the time this is retrieved, the username may have been | |||||
"normalized" from the original input to the constructor, converted into | |||||
a Unicode object, with underscores removed, etc. | |||||
This will never make an API query on its own, but if one has already been made | |||||
by the time this is retrieved, the username may have been "normalized" from the | |||||
original input to the constructor, converted into a Unicode object, with | |||||
underscores removed, etc. | |||||
""" | """ | ||||
return self._name | return self._name | ||||
@property | @property | ||||
def exists(self): | |||||
"""``True`` if the user exists, or ``False`` if they do not. | |||||
def exists(self) -> bool: | |||||
""" | |||||
``True`` if the user exists, or ``False`` if they do not. | |||||
Makes an API query only if we haven't made one already. | Makes an API query only if we haven't made one already. | ||||
""" | """ | ||||
@@ -192,124 +207,135 @@ class User: | |||||
return self._exists | return self._exists | ||||
@property | @property | ||||
def userid(self): | |||||
"""An integer ID used by MediaWiki to represent the user. | |||||
def userid(self) -> int: | |||||
""" | |||||
An integer ID used by MediaWiki to represent the user. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_userid") | return self._get_attribute("_userid") | ||||
@property | @property | ||||
def blockinfo(self): | |||||
"""Information about any current blocks on the user. | |||||
def blockinfo(self) -> dict[str, Any] | Literal[False]: | |||||
""" | |||||
Information about any current blocks on the user. | |||||
If the user is not blocked, returns ``False``. If they are, returns a | |||||
dict with three keys: ``"by"`` is the blocker's username, ``"reason"`` | |||||
is the reason why they were blocked, and ``"expiry"`` is when the block | |||||
expires. | |||||
If the user is not blocked, returns ``False``. If they are, returns a dict with | |||||
three keys: ``"by"`` is the blocker's username, ``"reason"`` is the reason why | |||||
they were blocked, and ``"expiry"`` is when the block expires. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_blockinfo") | return self._get_attribute("_blockinfo") | ||||
@property | @property | ||||
def groups(self): | |||||
"""A list of groups this user is in, including ``"*"``. | |||||
def groups(self) -> list[str]: | |||||
""" | |||||
A list of groups this user is in, including ``"*"``. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_groups") | return self._get_attribute("_groups") | ||||
@property | @property | ||||
def rights(self): | |||||
"""A list of this user's rights. | |||||
def rights(self) -> list[str]: | |||||
""" | |||||
A list of this user's rights. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_rights") | return self._get_attribute("_rights") | ||||
@property | @property | ||||
def editcount(self): | |||||
"""Returns the number of edits made by the user. | |||||
def editcount(self) -> int: | |||||
""" | |||||
Returns the number of edits made by the user. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_editcount") | return self._get_attribute("_editcount") | ||||
@property | @property | ||||
def registration(self): | |||||
"""The time the user registered as a :py:class:`time.struct_time`. | |||||
def registration(self) -> time.struct_time: | |||||
""" | |||||
The time the user registered as a :py:class:`time.struct_time`. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_registration") | return self._get_attribute("_registration") | ||||
@property | @property | ||||
def emailable(self): | |||||
"""``True`` if the user can be emailed, or ``False`` if they cannot. | |||||
def emailable(self) -> bool: | |||||
""" | |||||
``True`` if the user can be emailed, or ``False`` if they cannot. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_emailable") | return self._get_attribute("_emailable") | ||||
@property | @property | ||||
def gender(self): | |||||
"""The user's gender. | |||||
def gender(self) -> str: | |||||
""" | |||||
The user's gender. | |||||
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they | |||||
did not specify it. | |||||
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they did not | |||||
specify it. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user does not | |||||
exist. Makes an API query only if we haven't made one already. | |||||
""" | """ | ||||
return self._get_attribute("_gender") | return self._get_attribute("_gender") | ||||
@property | @property | ||||
def is_ip(self): | |||||
"""``True`` if the user is an IP address, or ``False`` otherwise. | |||||
def is_ip(self) -> bool: | |||||
""" | |||||
``True`` if the user is an IP address, or ``False`` otherwise. | |||||
This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the | |||||
username. No API queries are made. | |||||
This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the username. | |||||
No API queries are made. | |||||
""" | """ | ||||
try: | try: | ||||
inet_pton(AF_INET, self.name) | |||||
socket.inet_pton(socket.AF_INET, self.name) | |||||
except OSError: | except OSError: | ||||
try: | try: | ||||
inet_pton(AF_INET6, self.name) | |||||
socket.inet_pton(socket.AF_INET6, self.name) | |||||
except OSError: | except OSError: | ||||
return False | return False | ||||
return True | return True | ||||
def reload(self): | |||||
"""Forcibly reload the user's attributes. | |||||
def reload(self) -> None: | |||||
""" | |||||
Forcibly reload the user's attributes. | |||||
Emphasis on *reload*: this is only necessary if there is reason to | |||||
believe they have changed. | |||||
Emphasis on *reload*: this is only necessary if there is reason to believe they | |||||
have changed. | |||||
""" | """ | ||||
self._load_attributes() | self._load_attributes() | ||||
def get_userpage(self): | |||||
"""Return a Page object representing the user's userpage. | |||||
def get_userpage(self) -> Page: | |||||
""" | |||||
Return a Page object representing the user's userpage. | |||||
No checks are made to see if it exists or not. Proper site namespace | |||||
conventions are followed. | |||||
No checks are made to see if it exists or not. Proper site namespace conventions | |||||
are followed. | |||||
""" | """ | ||||
prefix = self.site.namespace_id_to_name(constants.NS_USER) | prefix = self.site.namespace_id_to_name(constants.NS_USER) | ||||
pagename = ":".join((prefix, self._name)) | pagename = ":".join((prefix, self._name)) | ||||
return Page(self.site, pagename) | return Page(self.site, pagename) | ||||
def get_talkpage(self): | |||||
"""Return a Page object representing the user's talkpage. | |||||
def get_talkpage(self) -> Page: | |||||
""" | |||||
Return a Page object representing the user's talkpage. | |||||
No checks are made to see if it exists or not. Proper site namespace | |||||
conventions are followed. | |||||
No checks are made to see if it exists or not. Proper site namespace conventions | |||||
are followed. | |||||
""" | """ | ||||
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) | prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) | ||||
pagename = ":".join((prefix, self._name)) | pagename = ":".join((prefix, self._name)) | ||||