Browse Source

wikiproject_tagger: Add typing

tags/v0.4
Ben Kurtovic 9 months ago
parent
commit
56df9eca5b
2 changed files with 191 additions and 131 deletions
  1. +190
    -130
      earwigbot/tasks/wikiproject_tagger.py
  2. +1
    -1
      earwigbot/wiki/site.py

+ 190
- 130
earwigbot/tasks/wikiproject_tagger.py View File

@@ -19,67 +19,123 @@
# SOFTWARE.

import re
from dataclasses import dataclass, field
from typing import NotRequired, TypedDict, Unpack

from mwparserfromhell.nodes import Template
from mwparserfromhell.wikicode import Wikicode

from earwigbot import exceptions
from earwigbot.tasks import Task
from earwigbot.wiki import constants
from earwigbot.wiki import Category, Page, Site, constants

JobKwargs = TypedDict(
"JobKwargs",
{
"banner": str,
"category": NotRequired[str],
"file": NotRequired[str],
"summary": NotRequired[str],
"update": NotRequired[bool],
"append": NotRequired[str],
"autoassess": NotRequired[bool | str],
"only-with": NotRequired[str],
"nocreate": NotRequired[bool],
"recursive": NotRequired[bool | int],
"tag-categories": NotRequired[bool],
"site": NotRequired[str],
"dry-run": NotRequired[bool],
},
)


@dataclass
class Job:
"""
Represents a single wikiproject-tagging task.

Stores information on the banner to add, the edit summary to use, whether or not to
autoassess and create new pages from scratch, and a counter of the number of pages
edited.
"""

banner: str
names: set[str]
summary: str
update: bool
append: str | None
autoassess: bool | str
only_with: set[str] | None
nocreate: bool
tag_categories: bool
dry_run: bool

counter: int = 0
processed_cats: set[str] = field(default_factory=set)
processed_pages: set[str] = field(default_factory=set)


class ShutoffEnabled(Exception):
"""
Raised by process_page() if shutoff is enabled.

Caught by run(), which will then stop the task.
"""


class WikiProjectTagger(Task):
"""A task to tag talk pages with WikiProject banners.
"""
A task to tag talk pages with WikiProject banners.

Usage: :command:`earwigbot -t wikiproject_tagger PATH
--banner BANNER (--category CAT | --file FILE) [--summary SUM] [--update]
[--append PARAMS] [--autoassess [CLASSES]] [--only-with BANNER]
[--nocreate] [--recursive [NUM]] [--site SITE] [--dry-run]`
Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER
[--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS]
[--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]]
[--site SITE] [--dry-run]`

.. glossary::

``--banner BANNER``
the page name of the banner to add, without a namespace (unless the
namespace is something other than ``Template``) so
``--banner "WikiProject Biography"`` for ``{{WikiProject Biography}}``
the page name of the banner to add, without a namespace (unless the namespace
is something other than ``Template``) so ``--banner "WikiProject Biography"``
for ``{{WikiProject Biography}}``
``--category CAT`` or ``--file FILE``
determines which pages to tag; either all pages in a category (to
include subcategories as well, see ``--recursive``) or all
pages/categories in a file (utf-8 encoded and path relative to the
current directory)
determines which pages to tag; either all pages in a category (to include
subcategories as well, see ``--recursive``) or all pages/categories in a file
(utf-8 encoded and path relative to the current directory)
``--summary SUM``
an optional edit summary to use; defaults to
``"Tagging with WikiProject banner {{BANNER}}."``
an optional edit summary to use; defaults to ``"Tagging with WikiProject banner
{{BANNER}}."``
``--update``
updates existing banners with new fields; should include at least one
of ``--append`` or ``--autoassess`` to be useful
updates existing banners with new fields; should include at least one of
``--append`` or ``--autoassess`` to be useful
``--append PARAMS``
optional comma-separated parameters to append to the banner (after an
auto-assessment, if any); use syntax ``importance=low,taskforce=yes``
to add ``|importance=low|taskforce=yes``
auto-assessment, if any); use syntax ``importance=low,taskforce=yes`` to add
``|importance=low|taskforce=yes``
``--autoassess [CLASSES]``
try to assess each article's class automatically based on the class of
other banners on the same page; if CLASSES is given as a
comma-separated list, only those classes will be auto-assessed
try to assess each article's class automatically based on the class of other
banners on the same page; if CLASSES is given as a comma-separated list, only
those classes will be auto-assessed
``--only-with BANNER``
only tag pages that already have the given banner
``--nocreate``
don't create new talk pages with just a banner if the page doesn't
already exist
``--recursive NUM``
recursively go through subcategories up to a maximum depth of ``NUM``,
or if ``NUM`` isn't provided, go infinitely (this can be dangerous)
recursively go through subcategories up to a maximum depth of ``NUM``, or if
``NUM`` isn't provided, go infinitely (this can be dangerous)
``--tag-categories``
also tag category pages
``--site SITE``
the ID of the site to tag pages on, defaulting to the default site
``--dry-run``
don't actually make any edits, just log the pages that would have been
edited

don't actually make any edits, just log the pages that would have been edited
"""

name = "wikiproject_tagger"

# Regexes for template names that should always go above the banner, based
# on [[Wikipedia:Talk page layout]]:
# Regexes for template names that should always go above the banner, based on
# [[Wikipedia:Talk page layout]]:
TOP_TEMPS = [
r"skip ?to ?(toc|talk|toctalk)$",
r"ga ?nominee$",
@@ -100,22 +156,27 @@ class WikiProjectTagger(Task):
]

@staticmethod
def _upperfirst(text):
"""Try to uppercase the first letter of a string."""
def _upperfirst(text: str) -> str:
"""
Try to uppercase the first letter of a string.
"""
try:
return text[0].upper() + text[1:]
except IndexError:
return text

def run(self, **kwargs):
"""Main entry point for the bot task."""
def run(self, **kwargs: Unpack[JobKwargs]) -> None:
"""
Main entry point for the bot task.
"""
if "file" not in kwargs and "category" not in kwargs:
log = "No pages to tag; I need either a 'category' or a 'file' passed as kwargs"
self.logger.error(log)
self.logger.error(
"No pages to tag; I need either a 'category' or a 'file' passed"
"as kwargs"
)
return
if "banner" not in kwargs:
log = "Needs a banner to add passed as the 'banner' kwarg"
self.logger.error(log)
self.logger.error("Needs a banner to add passed as the 'banner' kwarg")
return

site = self.bot.wiki.get_site(name=kwargs.get("site"))
@@ -139,7 +200,7 @@ class WikiProjectTagger(Task):
else:
only_with = None

job = _Job(
job = Job(
banner=banner,
names=names,
summary=summary,
@@ -154,11 +215,15 @@ class WikiProjectTagger(Task):

try:
self.run_job(kwargs, site, job, recursive)
except _ShutoffEnabled:
except ShutoffEnabled:
return

def run_job(self, kwargs, site, job, recursive):
"""Run a tagging *job* on a given *site*."""
def run_job(
self, kwargs: JobKwargs, site: Site, job: Job, recursive: bool | int
) -> None:
"""
Run a tagging *job* on a given *site*.
"""
if "category" in kwargs:
title = kwargs["category"]
title = self.guess_namespace(site, title, constants.NS_CATEGORY)
@@ -168,19 +233,22 @@ class WikiProjectTagger(Task):
with open(kwargs["file"]) as fileobj:
for line in fileobj:
if line.strip():
if line.startswith("[[") and line.endswith("]]"):
line = line[2:-2]
if "[[" in line:
match = re.search(r"\[\[(.+?)\]\]", line)
if match:
line = match.group(1)
page = site.get_page(line)
if page.namespace == constants.NS_CATEGORY:
self.process_category(page, job, recursive)
else:
self.process_page(page, job)

def guess_namespace(self, site, title, assumed):
"""If the given *title* does not have an explicit namespace, guess it.
def guess_namespace(self, site: Site, title: str, assumed: int) -> str:
"""
If the given *title* does not have an explicit namespace, guess it.

For example, when transcluding templates, the namespace is guessed to
be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` ->
For example, when transcluding templates, the namespace is guessed to be
``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` ->
``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``).
"""
prefix = title.split(":", 1)[0]
@@ -192,14 +260,16 @@ class WikiProjectTagger(Task):
return ":".join((site.namespace_id_to_name(assumed), title))
return title

def get_names(self, site, banner):
"""Return all possible aliases for a given *banner* template."""
def get_names(self, site: Site, banner: str) -> tuple[str, set[str] | None]:
"""
Return all possible aliases for a given *banner* template.
"""
title = self.guess_namespace(site, banner, constants.NS_TEMPLATE)
if title == banner:
banner = banner.split(":", 1)[1]
page = site.get_page(title)
if page.exists != page.PAGE_EXISTS:
self.logger.error("Banner [[%s]] does not exist", title)
self.logger.error(f"Banner [[{title}]] does not exist")
return banner, None

names = {banner, title}
@@ -215,18 +285,18 @@ class WikiProjectTagger(Task):
if backlink["ns"] == constants.NS_TEMPLATE:
names.add(backlink["title"].split(":", 1)[1])

log = "Found %s aliases for banner [[%s]]"
self.logger.debug(log, len(names), title)
self.logger.debug(f"Found {len(names)} aliases for banner [[{title}]]")
return banner, names

def process_category(self, page, job, recursive):
"""Try to tag all pages in the given category."""
def process_category(self, page: Page, job: Job, recursive: bool | int) -> None:
"""
Try to tag all pages in the given category.
"""
assert isinstance(page, Category), f"[[{page.title}]] is not a category"
if page.title in job.processed_cats:
self.logger.debug(
"Skipping category, already processed: [[%s]]", page.title
)
self.logger.debug(f"Skipping category, already processed: [[{page.title}]]")
return
self.logger.info("Processing category: [[%s]]", page.title)
self.logger.info(f"Processing category: [[{page.title}]]")
job.processed_cats.add(page.title)

if job.tag_categories:
@@ -245,19 +315,21 @@ class WikiProjectTagger(Task):
else:
self.process_page(member, job)

def process_page(self, page, job):
"""Try to tag a specific *page* using the *job* description."""
def process_page(self, page: Page, job: Job) -> None:
"""
Try to tag a specific *page* using the *job* description.
"""
if not page.is_talkpage:
page = page.toggle_talk()

if page.title in job.processed_pages:
self.logger.debug("Skipping page, already processed: [[%s]]", page.title)
self.logger.debug(f"Skipping page, already processed: [[{page.title}]]")
return
job.processed_pages.add(page.title)

if job.counter % 10 == 0: # Do a shutoff check every ten pages
if self.shutoff_enabled(page.site):
raise _ShutoffEnabled()
raise ShutoffEnabled()
job.counter += 1

try:
@@ -266,7 +338,7 @@ class WikiProjectTagger(Task):
self.process_new_page(page, job)
return
except exceptions.InvalidPageError:
self.logger.error("Skipping invalid page: [[%s]]", page.title)
self.logger.error(f"Skipping invalid page: [[{page.title}]]")
return

is_update = False
@@ -277,8 +349,10 @@ class WikiProjectTagger(Task):
is_update = True
break
else:
log = "Skipping page: [[%s]]; already tagged with '%s'"
self.logger.info(log, page.title, template.name)
self.logger.info(
f"Skipping page: [[{page.title}]]; already tagged with "
f"{template.name!r}"
)
return

if job.only_with:
@@ -286,20 +360,22 @@ class WikiProjectTagger(Task):
template.name.matches(job.only_with)
for template in code.ifilter_templates(recursive=True)
):
log = "Skipping page: [[%s]]; fails only-with condition"
self.logger.info(log, page.title)
self.logger.info(
f"Skipping page: [[{page.title}]]; fails only-with condition"
)
return

if is_update:
updated = self.update_banner(banner, job, code)
if not updated:
log = "Skipping page: [[%s]]; already tagged and no updates"
self.logger.info(log, page.title)
self.logger.info(
f"Skipping page: [[{page.title}]]; already tagged and no updates"
)
return
self.logger.info("Updating banner on page: [[%s]]", page.title)
self.logger.info(f"Updating banner on page: [[{page.title}]]")
banner = str(banner)
else:
self.logger.info("Tagging page: [[%s]]", page.title)
self.logger.info(f"Tagging page: [[{page.title}]]")
banner = self.make_banner(job, code)
shell = self.get_banner_shell(code)
if shell:
@@ -309,28 +385,33 @@ class WikiProjectTagger(Task):

self.save_page(page, job, str(code), banner)

def process_new_page(self, page, job):
"""Try to tag a *page* that doesn't exist yet using the *job*."""
def process_new_page(self, page: Page, job: Job) -> None:
"""
Try to tag a *page* that doesn't exist yet using the *job*.
"""
if job.nocreate or job.only_with:
log = "Skipping nonexistent page: [[%s]]"
self.logger.info(log, page.title)
self.logger.info(f"Skipping nonexistent page: [[{page.title}]]")
else:
self.logger.info("Tagging new page: [[%s]]", page.title)
self.logger.info(f"Tagging new page: [[{page.title}]]")
banner = self.make_banner(job)
self.save_page(page, job, banner, banner)

def save_page(self, page, job, text, banner):
"""Save a page with an updated banner."""
def save_page(self, page: Page, job: Job, text: str, banner: str) -> None:
"""
Save a page with an updated banner.
"""
if job.dry_run:
self.logger.debug("[DRY RUN] Banner: %s", banner)
self.logger.debug(f"[DRY RUN] Banner: {banner}")
else:
summary = job.summary.replace("$3", banner)
page.edit(text, self.make_summary(summary), minor=True)

def make_banner(self, job, code=None):
"""Return banner text to add based on a *job* and a page's *code*."""
def make_banner(self, job: Job, code: Wikicode | None = None) -> str:
"""
Return banner text to add based on a *job* and a page's *code*.
"""
banner = job.banner
if code is not None and job.autoassess is not False:
if code is not None and job.autoassess:
assess, reason = self.get_autoassessment(code, job.autoassess)
if assess:
banner += "|class=" + assess
@@ -340,14 +421,16 @@ class WikiProjectTagger(Task):
banner += "|" + "|".join(job.append.split(","))
return "{{" + banner + "}}"

def update_banner(self, banner, job, code):
"""Update an existing *banner* based on a *job* and a page's *code*."""
def update_banner(self, banner: Template, job: Job, code: Wikicode) -> bool:
"""
Update an existing *banner* based on a *job* and a page's *code*.
"""

def has(key):
def has(key: str) -> bool:
return banner.has(key) and banner.get(key).value.strip() not in ("", "?")

updated = False
if job.autoassess is not False:
if job.autoassess:
if not has("class"):
assess, reason = self.get_autoassessment(code, job.autoassess)
if assess:
@@ -362,8 +445,11 @@ class WikiProjectTagger(Task):
updated = True
return updated

def get_autoassessment(self, code, only_classes=None):
"""Get an autoassessment for a page.
def get_autoassessment(
self, code, only_classes: bool | str = False
) -> tuple[str, str] | tuple[None, None]:
"""
Get an autoassessment for a page.

Return (assessed class as a string or None, assessment reason or None).
"""
@@ -383,6 +469,7 @@ class WikiProjectTagger(Task):
"stub",
]
else:
assert only_classes, only_classes
classnames = [klass.strip().lower() for klass in only_classes.split(",")]

classes = {klass: 0 for klass in classnames}
@@ -404,19 +491,22 @@ class WikiProjectTagger(Task):
return self._upperfirst(rank), "inherit"
return None, None

def get_banner_shell(self, code):
"""Return the banner shell template within *code*, else ``None``."""
def get_banner_shell(self, code: Wikicode) -> Template | None:
"""
Return the banner shell template within *code*, else ``None``.
"""
regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)\s*(\||\}\})"
shells = code.filter_templates(matches=regex)
if not shells:
shells = code.filter_templates(matches=regex, recursive=True)
if shells:
log = "Inserting banner into shell: %s"
self.logger.debug(log, shells[0].name)
self.logger.debug(f"Inserting banner into shell: {shells[0].name}")
return shells[0]

def add_banner_to_shell(self, shell, banner):
"""Add *banner* to *shell*."""
def add_banner_to_shell(self, shell: Template, banner: str) -> None:
"""
Add *banner* to *shell*.
"""
if shell.has_param(1):
if str(shell.get(1).value).endswith("\n"):
banner += "\n"
@@ -426,18 +516,20 @@ class WikiProjectTagger(Task):
else:
shell.add(1, banner)

def add_banner(self, code, banner):
"""Add *banner* to *code*, following template order conventions."""
def add_banner(self, code: Wikicode, banner: str) -> None:
"""
Add *banner* to *code*, following template order conventions.
"""
predecessor = None
for template in code.ifilter_templates(recursive=False):
name = template.name.lower().replace("_", " ")
for regex in self.TOP_TEMPS:
if re.match(regex, name):
self.logger.debug("Skipping past top template: %s", name)
self.logger.debug(f"Skipping past top template: {name}")
predecessor = template
break
if "wikiproject" in name or name.startswith("wp"):
self.logger.debug("Skipping past banner template: %s", name)
self.logger.debug(f"Skipping past banner template: {name}")
predecessor = template

if predecessor:
@@ -451,35 +543,3 @@ class WikiProjectTagger(Task):
else:
self.logger.debug("Inserting banner at beginning")
code.insert(0, banner + "\n")


class _Job:
"""Represents a single wikiproject-tagging task.

Stores information on the banner to add, the edit summary to use, whether
or not to autoassess and create new pages from scratch, and a counter of
the number of pages edited.
"""

def __init__(self, **kwargs):
self.banner = kwargs["banner"]
self.names = kwargs["names"]
self.summary = kwargs["summary"]
self.update = kwargs["update"]
self.append = kwargs["append"]
self.autoassess = kwargs["autoassess"]
self.only_with = kwargs["only_with"]
self.nocreate = kwargs["nocreate"]
self.tag_categories = kwargs["tag_categories"]
self.dry_run = kwargs["dry_run"]

self.counter = 0
self.processed_cats = set()
self.processed_pages = set()


class _ShutoffEnabled(Exception):
"""Raised by process_page() if shutoff is enabled. Caught by run(), which
will then stop the task."""

pass

+ 1
- 1
earwigbot/wiki/site.py View File

@@ -894,7 +894,7 @@ class Site:
raise exceptions.APIError(err.format(action, res))
return self._tokens[action]

def namespace_id_to_name(self, ns_id, all=False):
def namespace_id_to_name(self, ns_id: int, all: bool = False) -> str:
"""Given a namespace ID, returns associated namespace names.

If *all* is ``False`` (default), we'll return the first name in the


Loading…
Cancel
Save