Browse Source

wikiproject_tagger: Add typing

tags/v0.4
Ben Kurtovic 7 months ago
parent
commit
56df9eca5b
2 changed files with 191 additions and 131 deletions
  1. +190
    -130
      earwigbot/tasks/wikiproject_tagger.py
  2. +1
    -1
      earwigbot/wiki/site.py

+ 190
- 130
earwigbot/tasks/wikiproject_tagger.py View File

@@ -19,67 +19,123 @@
# SOFTWARE. # SOFTWARE.


import re import re
from dataclasses import dataclass, field
from typing import NotRequired, TypedDict, Unpack

from mwparserfromhell.nodes import Template
from mwparserfromhell.wikicode import Wikicode


from earwigbot import exceptions from earwigbot import exceptions
from earwigbot.tasks import Task from earwigbot.tasks import Task
from earwigbot.wiki import constants
from earwigbot.wiki import Category, Page, Site, constants

JobKwargs = TypedDict(
"JobKwargs",
{
"banner": str,
"category": NotRequired[str],
"file": NotRequired[str],
"summary": NotRequired[str],
"update": NotRequired[bool],
"append": NotRequired[str],
"autoassess": NotRequired[bool | str],
"only-with": NotRequired[str],
"nocreate": NotRequired[bool],
"recursive": NotRequired[bool | int],
"tag-categories": NotRequired[bool],
"site": NotRequired[str],
"dry-run": NotRequired[bool],
},
)


@dataclass
class Job:
"""
Represents a single wikiproject-tagging task.

Stores information on the banner to add, the edit summary to use, whether or not to
autoassess and create new pages from scratch, and a counter of the number of pages
edited.
"""

banner: str
names: set[str]
summary: str
update: bool
append: str | None
autoassess: bool | str
only_with: set[str] | None
nocreate: bool
tag_categories: bool
dry_run: bool

counter: int = 0
processed_cats: set[str] = field(default_factory=set)
processed_pages: set[str] = field(default_factory=set)


class ShutoffEnabled(Exception):
"""
Raised by process_page() if shutoff is enabled.

Caught by run(), which will then stop the task.
"""




class WikiProjectTagger(Task): class WikiProjectTagger(Task):
"""A task to tag talk pages with WikiProject banners.
"""
A task to tag talk pages with WikiProject banners.


Usage: :command:`earwigbot -t wikiproject_tagger PATH
--banner BANNER (--category CAT | --file FILE) [--summary SUM] [--update]
[--append PARAMS] [--autoassess [CLASSES]] [--only-with BANNER]
[--nocreate] [--recursive [NUM]] [--site SITE] [--dry-run]`
Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER
[--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS]
[--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]]
[--site SITE] [--dry-run]`


.. glossary:: .. glossary::


``--banner BANNER`` ``--banner BANNER``
the page name of the banner to add, without a namespace (unless the
namespace is something other than ``Template``) so
``--banner "WikiProject Biography"`` for ``{{WikiProject Biography}}``
the page name of the banner to add, without a namespace (unless the namespace
is something other than ``Template``) so ``--banner "WikiProject Biography"``
for ``{{WikiProject Biography}}``
``--category CAT`` or ``--file FILE`` ``--category CAT`` or ``--file FILE``
determines which pages to tag; either all pages in a category (to
include subcategories as well, see ``--recursive``) or all
pages/categories in a file (utf-8 encoded and path relative to the
current directory)
determines which pages to tag; either all pages in a category (to include
subcategories as well, see ``--recursive``) or all pages/categories in a file
(utf-8 encoded and path relative to the current directory)
``--summary SUM`` ``--summary SUM``
an optional edit summary to use; defaults to
``"Tagging with WikiProject banner {{BANNER}}."``
an optional edit summary to use; defaults to ``"Tagging with WikiProject banner
{{BANNER}}."``
``--update`` ``--update``
updates existing banners with new fields; should include at least one
of ``--append`` or ``--autoassess`` to be useful
updates existing banners with new fields; should include at least one of
``--append`` or ``--autoassess`` to be useful
``--append PARAMS`` ``--append PARAMS``
optional comma-separated parameters to append to the banner (after an optional comma-separated parameters to append to the banner (after an
auto-assessment, if any); use syntax ``importance=low,taskforce=yes``
to add ``|importance=low|taskforce=yes``
auto-assessment, if any); use syntax ``importance=low,taskforce=yes`` to add
``|importance=low|taskforce=yes``
``--autoassess [CLASSES]`` ``--autoassess [CLASSES]``
try to assess each article's class automatically based on the class of
other banners on the same page; if CLASSES is given as a
comma-separated list, only those classes will be auto-assessed
try to assess each article's class automatically based on the class of other
banners on the same page; if CLASSES is given as a comma-separated list, only
those classes will be auto-assessed
``--only-with BANNER`` ``--only-with BANNER``
only tag pages that already have the given banner only tag pages that already have the given banner
``--nocreate`` ``--nocreate``
don't create new talk pages with just a banner if the page doesn't don't create new talk pages with just a banner if the page doesn't
already exist already exist
``--recursive NUM`` ``--recursive NUM``
recursively go through subcategories up to a maximum depth of ``NUM``,
or if ``NUM`` isn't provided, go infinitely (this can be dangerous)
recursively go through subcategories up to a maximum depth of ``NUM``, or if
``NUM`` isn't provided, go infinitely (this can be dangerous)
``--tag-categories`` ``--tag-categories``
also tag category pages also tag category pages
``--site SITE`` ``--site SITE``
the ID of the site to tag pages on, defaulting to the default site the ID of the site to tag pages on, defaulting to the default site
``--dry-run`` ``--dry-run``
don't actually make any edits, just log the pages that would have been
edited

don't actually make any edits, just log the pages that would have been edited
""" """


name = "wikiproject_tagger" name = "wikiproject_tagger"


# Regexes for template names that should always go above the banner, based
# on [[Wikipedia:Talk page layout]]:
# Regexes for template names that should always go above the banner, based on
# [[Wikipedia:Talk page layout]]:
TOP_TEMPS = [ TOP_TEMPS = [
r"skip ?to ?(toc|talk|toctalk)$", r"skip ?to ?(toc|talk|toctalk)$",
r"ga ?nominee$", r"ga ?nominee$",
@@ -100,22 +156,27 @@ class WikiProjectTagger(Task):
] ]


@staticmethod @staticmethod
def _upperfirst(text):
"""Try to uppercase the first letter of a string."""
def _upperfirst(text: str) -> str:
"""
Try to uppercase the first letter of a string.
"""
try: try:
return text[0].upper() + text[1:] return text[0].upper() + text[1:]
except IndexError: except IndexError:
return text return text


def run(self, **kwargs):
"""Main entry point for the bot task."""
def run(self, **kwargs: Unpack[JobKwargs]) -> None:
"""
Main entry point for the bot task.
"""
if "file" not in kwargs and "category" not in kwargs: if "file" not in kwargs and "category" not in kwargs:
log = "No pages to tag; I need either a 'category' or a 'file' passed as kwargs"
self.logger.error(log)
self.logger.error(
"No pages to tag; I need either a 'category' or a 'file' passed"
"as kwargs"
)
return return
if "banner" not in kwargs: if "banner" not in kwargs:
log = "Needs a banner to add passed as the 'banner' kwarg"
self.logger.error(log)
self.logger.error("Needs a banner to add passed as the 'banner' kwarg")
return return


site = self.bot.wiki.get_site(name=kwargs.get("site")) site = self.bot.wiki.get_site(name=kwargs.get("site"))
@@ -139,7 +200,7 @@ class WikiProjectTagger(Task):
else: else:
only_with = None only_with = None


job = _Job(
job = Job(
banner=banner, banner=banner,
names=names, names=names,
summary=summary, summary=summary,
@@ -154,11 +215,15 @@ class WikiProjectTagger(Task):


try: try:
self.run_job(kwargs, site, job, recursive) self.run_job(kwargs, site, job, recursive)
except _ShutoffEnabled:
except ShutoffEnabled:
return return


def run_job(self, kwargs, site, job, recursive):
"""Run a tagging *job* on a given *site*."""
def run_job(
self, kwargs: JobKwargs, site: Site, job: Job, recursive: bool | int
) -> None:
"""
Run a tagging *job* on a given *site*.
"""
if "category" in kwargs: if "category" in kwargs:
title = kwargs["category"] title = kwargs["category"]
title = self.guess_namespace(site, title, constants.NS_CATEGORY) title = self.guess_namespace(site, title, constants.NS_CATEGORY)
@@ -168,19 +233,22 @@ class WikiProjectTagger(Task):
with open(kwargs["file"]) as fileobj: with open(kwargs["file"]) as fileobj:
for line in fileobj: for line in fileobj:
if line.strip(): if line.strip():
if line.startswith("[[") and line.endswith("]]"):
line = line[2:-2]
if "[[" in line:
match = re.search(r"\[\[(.+?)\]\]", line)
if match:
line = match.group(1)
page = site.get_page(line) page = site.get_page(line)
if page.namespace == constants.NS_CATEGORY: if page.namespace == constants.NS_CATEGORY:
self.process_category(page, job, recursive) self.process_category(page, job, recursive)
else: else:
self.process_page(page, job) self.process_page(page, job)


def guess_namespace(self, site, title, assumed):
"""If the given *title* does not have an explicit namespace, guess it.
def guess_namespace(self, site: Site, title: str, assumed: int) -> str:
"""
If the given *title* does not have an explicit namespace, guess it.


For example, when transcluding templates, the namespace is guessed to
be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` ->
For example, when transcluding templates, the namespace is guessed to be
``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` ->
``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``). ``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``).
""" """
prefix = title.split(":", 1)[0] prefix = title.split(":", 1)[0]
@@ -192,14 +260,16 @@ class WikiProjectTagger(Task):
return ":".join((site.namespace_id_to_name(assumed), title)) return ":".join((site.namespace_id_to_name(assumed), title))
return title return title


def get_names(self, site, banner):
"""Return all possible aliases for a given *banner* template."""
def get_names(self, site: Site, banner: str) -> tuple[str, set[str] | None]:
"""
Return all possible aliases for a given *banner* template.
"""
title = self.guess_namespace(site, banner, constants.NS_TEMPLATE) title = self.guess_namespace(site, banner, constants.NS_TEMPLATE)
if title == banner: if title == banner:
banner = banner.split(":", 1)[1] banner = banner.split(":", 1)[1]
page = site.get_page(title) page = site.get_page(title)
if page.exists != page.PAGE_EXISTS: if page.exists != page.PAGE_EXISTS:
self.logger.error("Banner [[%s]] does not exist", title)
self.logger.error(f"Banner [[{title}]] does not exist")
return banner, None return banner, None


names = {banner, title} names = {banner, title}
@@ -215,18 +285,18 @@ class WikiProjectTagger(Task):
if backlink["ns"] == constants.NS_TEMPLATE: if backlink["ns"] == constants.NS_TEMPLATE:
names.add(backlink["title"].split(":", 1)[1]) names.add(backlink["title"].split(":", 1)[1])


log = "Found %s aliases for banner [[%s]]"
self.logger.debug(log, len(names), title)
self.logger.debug(f"Found {len(names)} aliases for banner [[{title}]]")
return banner, names return banner, names


def process_category(self, page, job, recursive):
"""Try to tag all pages in the given category."""
def process_category(self, page: Page, job: Job, recursive: bool | int) -> None:
"""
Try to tag all pages in the given category.
"""
assert isinstance(page, Category), f"[[{page.title}]] is not a category"
if page.title in job.processed_cats: if page.title in job.processed_cats:
self.logger.debug(
"Skipping category, already processed: [[%s]]", page.title
)
self.logger.debug(f"Skipping category, already processed: [[{page.title}]]")
return return
self.logger.info("Processing category: [[%s]]", page.title)
self.logger.info(f"Processing category: [[{page.title}]]")
job.processed_cats.add(page.title) job.processed_cats.add(page.title)


if job.tag_categories: if job.tag_categories:
@@ -245,19 +315,21 @@ class WikiProjectTagger(Task):
else: else:
self.process_page(member, job) self.process_page(member, job)


def process_page(self, page, job):
"""Try to tag a specific *page* using the *job* description."""
def process_page(self, page: Page, job: Job) -> None:
"""
Try to tag a specific *page* using the *job* description.
"""
if not page.is_talkpage: if not page.is_talkpage:
page = page.toggle_talk() page = page.toggle_talk()


if page.title in job.processed_pages: if page.title in job.processed_pages:
self.logger.debug("Skipping page, already processed: [[%s]]", page.title)
self.logger.debug(f"Skipping page, already processed: [[{page.title}]]")
return return
job.processed_pages.add(page.title) job.processed_pages.add(page.title)


if job.counter % 10 == 0: # Do a shutoff check every ten pages if job.counter % 10 == 0: # Do a shutoff check every ten pages
if self.shutoff_enabled(page.site): if self.shutoff_enabled(page.site):
raise _ShutoffEnabled()
raise ShutoffEnabled()
job.counter += 1 job.counter += 1


try: try:
@@ -266,7 +338,7 @@ class WikiProjectTagger(Task):
self.process_new_page(page, job) self.process_new_page(page, job)
return return
except exceptions.InvalidPageError: except exceptions.InvalidPageError:
self.logger.error("Skipping invalid page: [[%s]]", page.title)
self.logger.error(f"Skipping invalid page: [[{page.title}]]")
return return


is_update = False is_update = False
@@ -277,8 +349,10 @@ class WikiProjectTagger(Task):
is_update = True is_update = True
break break
else: else:
log = "Skipping page: [[%s]]; already tagged with '%s'"
self.logger.info(log, page.title, template.name)
self.logger.info(
f"Skipping page: [[{page.title}]]; already tagged with "
f"{template.name!r}"
)
return return


if job.only_with: if job.only_with:
@@ -286,20 +360,22 @@ class WikiProjectTagger(Task):
template.name.matches(job.only_with) template.name.matches(job.only_with)
for template in code.ifilter_templates(recursive=True) for template in code.ifilter_templates(recursive=True)
): ):
log = "Skipping page: [[%s]]; fails only-with condition"
self.logger.info(log, page.title)
self.logger.info(
f"Skipping page: [[{page.title}]]; fails only-with condition"
)
return return


if is_update: if is_update:
updated = self.update_banner(banner, job, code) updated = self.update_banner(banner, job, code)
if not updated: if not updated:
log = "Skipping page: [[%s]]; already tagged and no updates"
self.logger.info(log, page.title)
self.logger.info(
f"Skipping page: [[{page.title}]]; already tagged and no updates"
)
return return
self.logger.info("Updating banner on page: [[%s]]", page.title)
self.logger.info(f"Updating banner on page: [[{page.title}]]")
banner = str(banner) banner = str(banner)
else: else:
self.logger.info("Tagging page: [[%s]]", page.title)
self.logger.info(f"Tagging page: [[{page.title}]]")
banner = self.make_banner(job, code) banner = self.make_banner(job, code)
shell = self.get_banner_shell(code) shell = self.get_banner_shell(code)
if shell: if shell:
@@ -309,28 +385,33 @@ class WikiProjectTagger(Task):


self.save_page(page, job, str(code), banner) self.save_page(page, job, str(code), banner)


def process_new_page(self, page, job):
"""Try to tag a *page* that doesn't exist yet using the *job*."""
def process_new_page(self, page: Page, job: Job) -> None:
"""
Try to tag a *page* that doesn't exist yet using the *job*.
"""
if job.nocreate or job.only_with: if job.nocreate or job.only_with:
log = "Skipping nonexistent page: [[%s]]"
self.logger.info(log, page.title)
self.logger.info(f"Skipping nonexistent page: [[{page.title}]]")
else: else:
self.logger.info("Tagging new page: [[%s]]", page.title)
self.logger.info(f"Tagging new page: [[{page.title}]]")
banner = self.make_banner(job) banner = self.make_banner(job)
self.save_page(page, job, banner, banner) self.save_page(page, job, banner, banner)


def save_page(self, page, job, text, banner):
"""Save a page with an updated banner."""
def save_page(self, page: Page, job: Job, text: str, banner: str) -> None:
"""
Save a page with an updated banner.
"""
if job.dry_run: if job.dry_run:
self.logger.debug("[DRY RUN] Banner: %s", banner)
self.logger.debug(f"[DRY RUN] Banner: {banner}")
else: else:
summary = job.summary.replace("$3", banner) summary = job.summary.replace("$3", banner)
page.edit(text, self.make_summary(summary), minor=True) page.edit(text, self.make_summary(summary), minor=True)


def make_banner(self, job, code=None):
"""Return banner text to add based on a *job* and a page's *code*."""
def make_banner(self, job: Job, code: Wikicode | None = None) -> str:
"""
Return banner text to add based on a *job* and a page's *code*.
"""
banner = job.banner banner = job.banner
if code is not None and job.autoassess is not False:
if code is not None and job.autoassess:
assess, reason = self.get_autoassessment(code, job.autoassess) assess, reason = self.get_autoassessment(code, job.autoassess)
if assess: if assess:
banner += "|class=" + assess banner += "|class=" + assess
@@ -340,14 +421,16 @@ class WikiProjectTagger(Task):
banner += "|" + "|".join(job.append.split(",")) banner += "|" + "|".join(job.append.split(","))
return "{{" + banner + "}}" return "{{" + banner + "}}"


def update_banner(self, banner, job, code):
"""Update an existing *banner* based on a *job* and a page's *code*."""
def update_banner(self, banner: Template, job: Job, code: Wikicode) -> bool:
"""
Update an existing *banner* based on a *job* and a page's *code*.
"""


def has(key):
def has(key: str) -> bool:
return banner.has(key) and banner.get(key).value.strip() not in ("", "?") return banner.has(key) and banner.get(key).value.strip() not in ("", "?")


updated = False updated = False
if job.autoassess is not False:
if job.autoassess:
if not has("class"): if not has("class"):
assess, reason = self.get_autoassessment(code, job.autoassess) assess, reason = self.get_autoassessment(code, job.autoassess)
if assess: if assess:
@@ -362,8 +445,11 @@ class WikiProjectTagger(Task):
updated = True updated = True
return updated return updated


def get_autoassessment(self, code, only_classes=None):
"""Get an autoassessment for a page.
def get_autoassessment(
self, code, only_classes: bool | str = False
) -> tuple[str, str] | tuple[None, None]:
"""
Get an autoassessment for a page.


Return (assessed class as a string or None, assessment reason or None). Return (assessed class as a string or None, assessment reason or None).
""" """
@@ -383,6 +469,7 @@ class WikiProjectTagger(Task):
"stub", "stub",
] ]
else: else:
assert only_classes, only_classes
classnames = [klass.strip().lower() for klass in only_classes.split(",")] classnames = [klass.strip().lower() for klass in only_classes.split(",")]


classes = {klass: 0 for klass in classnames} classes = {klass: 0 for klass in classnames}
@@ -404,19 +491,22 @@ class WikiProjectTagger(Task):
return self._upperfirst(rank), "inherit" return self._upperfirst(rank), "inherit"
return None, None return None, None


def get_banner_shell(self, code):
"""Return the banner shell template within *code*, else ``None``."""
def get_banner_shell(self, code: Wikicode) -> Template | None:
"""
Return the banner shell template within *code*, else ``None``.
"""
regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)\s*(\||\}\})" regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)\s*(\||\}\})"
shells = code.filter_templates(matches=regex) shells = code.filter_templates(matches=regex)
if not shells: if not shells:
shells = code.filter_templates(matches=regex, recursive=True) shells = code.filter_templates(matches=regex, recursive=True)
if shells: if shells:
log = "Inserting banner into shell: %s"
self.logger.debug(log, shells[0].name)
self.logger.debug(f"Inserting banner into shell: {shells[0].name}")
return shells[0] return shells[0]


def add_banner_to_shell(self, shell, banner):
"""Add *banner* to *shell*."""
def add_banner_to_shell(self, shell: Template, banner: str) -> None:
"""
Add *banner* to *shell*.
"""
if shell.has_param(1): if shell.has_param(1):
if str(shell.get(1).value).endswith("\n"): if str(shell.get(1).value).endswith("\n"):
banner += "\n" banner += "\n"
@@ -426,18 +516,20 @@ class WikiProjectTagger(Task):
else: else:
shell.add(1, banner) shell.add(1, banner)


def add_banner(self, code, banner):
"""Add *banner* to *code*, following template order conventions."""
def add_banner(self, code: Wikicode, banner: str) -> None:
"""
Add *banner* to *code*, following template order conventions.
"""
predecessor = None predecessor = None
for template in code.ifilter_templates(recursive=False): for template in code.ifilter_templates(recursive=False):
name = template.name.lower().replace("_", " ") name = template.name.lower().replace("_", " ")
for regex in self.TOP_TEMPS: for regex in self.TOP_TEMPS:
if re.match(regex, name): if re.match(regex, name):
self.logger.debug("Skipping past top template: %s", name)
self.logger.debug(f"Skipping past top template: {name}")
predecessor = template predecessor = template
break break
if "wikiproject" in name or name.startswith("wp"): if "wikiproject" in name or name.startswith("wp"):
self.logger.debug("Skipping past banner template: %s", name)
self.logger.debug(f"Skipping past banner template: {name}")
predecessor = template predecessor = template


if predecessor: if predecessor:
@@ -451,35 +543,3 @@ class WikiProjectTagger(Task):
else: else:
self.logger.debug("Inserting banner at beginning") self.logger.debug("Inserting banner at beginning")
code.insert(0, banner + "\n") code.insert(0, banner + "\n")


class _Job:
"""Represents a single wikiproject-tagging task.

Stores information on the banner to add, the edit summary to use, whether
or not to autoassess and create new pages from scratch, and a counter of
the number of pages edited.
"""

def __init__(self, **kwargs):
self.banner = kwargs["banner"]
self.names = kwargs["names"]
self.summary = kwargs["summary"]
self.update = kwargs["update"]
self.append = kwargs["append"]
self.autoassess = kwargs["autoassess"]
self.only_with = kwargs["only_with"]
self.nocreate = kwargs["nocreate"]
self.tag_categories = kwargs["tag_categories"]
self.dry_run = kwargs["dry_run"]

self.counter = 0
self.processed_cats = set()
self.processed_pages = set()


class _ShutoffEnabled(Exception):
"""Raised by process_page() if shutoff is enabled. Caught by run(), which
will then stop the task."""

pass

+ 1
- 1
earwigbot/wiki/site.py View File

@@ -894,7 +894,7 @@ class Site:
raise exceptions.APIError(err.format(action, res)) raise exceptions.APIError(err.format(action, res))
return self._tokens[action] return self._tokens[action]


def namespace_id_to_name(self, ns_id, all=False):
def namespace_id_to_name(self, ns_id: int, all: bool = False) -> str:
"""Given a namespace ID, returns associated namespace names. """Given a namespace ID, returns associated namespace names.


If *all* is ``False`` (default), we'll return the first name in the If *all* is ``False`` (default), we'll return the first name in the


Loading…
Cancel
Save