From 56df9eca5b0a54cdf5220f71a05559691e1bd57a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 8 Apr 2024 20:06:43 -0400 Subject: [PATCH] wikiproject_tagger: Add typing --- earwigbot/tasks/wikiproject_tagger.py | 320 ++++++++++++++++++++-------------- earwigbot/wiki/site.py | 2 +- 2 files changed, 191 insertions(+), 131 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index c0e80de..9620c84 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -19,67 +19,123 @@ # SOFTWARE. import re +from dataclasses import dataclass, field +from typing import NotRequired, TypedDict, Unpack + +from mwparserfromhell.nodes import Template +from mwparserfromhell.wikicode import Wikicode from earwigbot import exceptions from earwigbot.tasks import Task -from earwigbot.wiki import constants +from earwigbot.wiki import Category, Page, Site, constants + +JobKwargs = TypedDict( + "JobKwargs", + { + "banner": str, + "category": NotRequired[str], + "file": NotRequired[str], + "summary": NotRequired[str], + "update": NotRequired[bool], + "append": NotRequired[str], + "autoassess": NotRequired[bool | str], + "only-with": NotRequired[str], + "nocreate": NotRequired[bool], + "recursive": NotRequired[bool | int], + "tag-categories": NotRequired[bool], + "site": NotRequired[str], + "dry-run": NotRequired[bool], + }, +) + + +@dataclass +class Job: + """ + Represents a single wikiproject-tagging task. + + Stores information on the banner to add, the edit summary to use, whether or not to + autoassess and create new pages from scratch, and a counter of the number of pages + edited. + """ + + banner: str + names: set[str] + summary: str + update: bool + append: str | None + autoassess: bool | str + only_with: set[str] | None + nocreate: bool + tag_categories: bool + dry_run: bool + + counter: int = 0 + processed_cats: set[str] = field(default_factory=set) + processed_pages: set[str] = field(default_factory=set) + + +class ShutoffEnabled(Exception): + """ + Raised by process_page() if shutoff is enabled. + + Caught by run(), which will then stop the task. + """ class WikiProjectTagger(Task): - """A task to tag talk pages with WikiProject banners. + """ + A task to tag talk pages with WikiProject banners. - Usage: :command:`earwigbot -t wikiproject_tagger PATH - --banner BANNER (--category CAT | --file FILE) [--summary SUM] [--update] - [--append PARAMS] [--autoassess [CLASSES]] [--only-with BANNER] - [--nocreate] [--recursive [NUM]] [--site SITE] [--dry-run]` + Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER + [--category CAT | --file FILE] [--summary SUM] [--update] [--append PARAMS] + [--autoassess [CLASSES]] [--only-with BANNER] [--nocreate] [--recursive [NUM]] + [--site SITE] [--dry-run]` .. glossary:: ``--banner BANNER`` - the page name of the banner to add, without a namespace (unless the - namespace is something other than ``Template``) so - ``--banner "WikiProject Biography"`` for ``{{WikiProject Biography}}`` + the page name of the banner to add, without a namespace (unless the namespace + is something other than ``Template``) so ``--banner "WikiProject Biography"`` + for ``{{WikiProject Biography}}`` ``--category CAT`` or ``--file FILE`` - determines which pages to tag; either all pages in a category (to - include subcategories as well, see ``--recursive``) or all - pages/categories in a file (utf-8 encoded and path relative to the - current directory) + determines which pages to tag; either all pages in a category (to include + subcategories as well, see ``--recursive``) or all pages/categories in a file + (utf-8 encoded and path relative to the current directory) ``--summary SUM`` - an optional edit summary to use; defaults to - ``"Tagging with WikiProject banner {{BANNER}}."`` + an optional edit summary to use; defaults to ``"Tagging with WikiProject banner + {{BANNER}}."`` ``--update`` - updates existing banners with new fields; should include at least one - of ``--append`` or ``--autoassess`` to be useful + updates existing banners with new fields; should include at least one of + ``--append`` or ``--autoassess`` to be useful ``--append PARAMS`` optional comma-separated parameters to append to the banner (after an - auto-assessment, if any); use syntax ``importance=low,taskforce=yes`` - to add ``|importance=low|taskforce=yes`` + auto-assessment, if any); use syntax ``importance=low,taskforce=yes`` to add + ``|importance=low|taskforce=yes`` ``--autoassess [CLASSES]`` - try to assess each article's class automatically based on the class of - other banners on the same page; if CLASSES is given as a - comma-separated list, only those classes will be auto-assessed + try to assess each article's class automatically based on the class of other + banners on the same page; if CLASSES is given as a comma-separated list, only + those classes will be auto-assessed ``--only-with BANNER`` only tag pages that already have the given banner ``--nocreate`` don't create new talk pages with just a banner if the page doesn't already exist ``--recursive NUM`` - recursively go through subcategories up to a maximum depth of ``NUM``, - or if ``NUM`` isn't provided, go infinitely (this can be dangerous) + recursively go through subcategories up to a maximum depth of ``NUM``, or if + ``NUM`` isn't provided, go infinitely (this can be dangerous) ``--tag-categories`` also tag category pages ``--site SITE`` the ID of the site to tag pages on, defaulting to the default site ``--dry-run`` - don't actually make any edits, just log the pages that would have been - edited - + don't actually make any edits, just log the pages that would have been edited """ name = "wikiproject_tagger" - # Regexes for template names that should always go above the banner, based - # on [[Wikipedia:Talk page layout]]: + # Regexes for template names that should always go above the banner, based on + # [[Wikipedia:Talk page layout]]: TOP_TEMPS = [ r"skip ?to ?(toc|talk|toctalk)$", r"ga ?nominee$", @@ -100,22 +156,27 @@ class WikiProjectTagger(Task): ] @staticmethod - def _upperfirst(text): - """Try to uppercase the first letter of a string.""" + def _upperfirst(text: str) -> str: + """ + Try to uppercase the first letter of a string. + """ try: return text[0].upper() + text[1:] except IndexError: return text - def run(self, **kwargs): - """Main entry point for the bot task.""" + def run(self, **kwargs: Unpack[JobKwargs]) -> None: + """ + Main entry point for the bot task. + """ if "file" not in kwargs and "category" not in kwargs: - log = "No pages to tag; I need either a 'category' or a 'file' passed as kwargs" - self.logger.error(log) + self.logger.error( + "No pages to tag; I need either a 'category' or a 'file' passed" + "as kwargs" + ) return if "banner" not in kwargs: - log = "Needs a banner to add passed as the 'banner' kwarg" - self.logger.error(log) + self.logger.error("Needs a banner to add passed as the 'banner' kwarg") return site = self.bot.wiki.get_site(name=kwargs.get("site")) @@ -139,7 +200,7 @@ class WikiProjectTagger(Task): else: only_with = None - job = _Job( + job = Job( banner=banner, names=names, summary=summary, @@ -154,11 +215,15 @@ class WikiProjectTagger(Task): try: self.run_job(kwargs, site, job, recursive) - except _ShutoffEnabled: + except ShutoffEnabled: return - def run_job(self, kwargs, site, job, recursive): - """Run a tagging *job* on a given *site*.""" + def run_job( + self, kwargs: JobKwargs, site: Site, job: Job, recursive: bool | int + ) -> None: + """ + Run a tagging *job* on a given *site*. + """ if "category" in kwargs: title = kwargs["category"] title = self.guess_namespace(site, title, constants.NS_CATEGORY) @@ -168,19 +233,22 @@ class WikiProjectTagger(Task): with open(kwargs["file"]) as fileobj: for line in fileobj: if line.strip(): - if line.startswith("[[") and line.endswith("]]"): - line = line[2:-2] + if "[[" in line: + match = re.search(r"\[\[(.+?)\]\]", line) + if match: + line = match.group(1) page = site.get_page(line) if page.namespace == constants.NS_CATEGORY: self.process_category(page, job, recursive) else: self.process_page(page, job) - def guess_namespace(self, site, title, assumed): - """If the given *title* does not have an explicit namespace, guess it. + def guess_namespace(self, site: Site, title: str, assumed: int) -> str: + """ + If the given *title* does not have an explicit namespace, guess it. - For example, when transcluding templates, the namespace is guessed to - be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` -> + For example, when transcluding templates, the namespace is guessed to be + ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` -> ``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``). """ prefix = title.split(":", 1)[0] @@ -192,14 +260,16 @@ class WikiProjectTagger(Task): return ":".join((site.namespace_id_to_name(assumed), title)) return title - def get_names(self, site, banner): - """Return all possible aliases for a given *banner* template.""" + def get_names(self, site: Site, banner: str) -> tuple[str, set[str] | None]: + """ + Return all possible aliases for a given *banner* template. + """ title = self.guess_namespace(site, banner, constants.NS_TEMPLATE) if title == banner: banner = banner.split(":", 1)[1] page = site.get_page(title) if page.exists != page.PAGE_EXISTS: - self.logger.error("Banner [[%s]] does not exist", title) + self.logger.error(f"Banner [[{title}]] does not exist") return banner, None names = {banner, title} @@ -215,18 +285,18 @@ class WikiProjectTagger(Task): if backlink["ns"] == constants.NS_TEMPLATE: names.add(backlink["title"].split(":", 1)[1]) - log = "Found %s aliases for banner [[%s]]" - self.logger.debug(log, len(names), title) + self.logger.debug(f"Found {len(names)} aliases for banner [[{title}]]") return banner, names - def process_category(self, page, job, recursive): - """Try to tag all pages in the given category.""" + def process_category(self, page: Page, job: Job, recursive: bool | int) -> None: + """ + Try to tag all pages in the given category. + """ + assert isinstance(page, Category), f"[[{page.title}]] is not a category" if page.title in job.processed_cats: - self.logger.debug( - "Skipping category, already processed: [[%s]]", page.title - ) + self.logger.debug(f"Skipping category, already processed: [[{page.title}]]") return - self.logger.info("Processing category: [[%s]]", page.title) + self.logger.info(f"Processing category: [[{page.title}]]") job.processed_cats.add(page.title) if job.tag_categories: @@ -245,19 +315,21 @@ class WikiProjectTagger(Task): else: self.process_page(member, job) - def process_page(self, page, job): - """Try to tag a specific *page* using the *job* description.""" + def process_page(self, page: Page, job: Job) -> None: + """ + Try to tag a specific *page* using the *job* description. + """ if not page.is_talkpage: page = page.toggle_talk() if page.title in job.processed_pages: - self.logger.debug("Skipping page, already processed: [[%s]]", page.title) + self.logger.debug(f"Skipping page, already processed: [[{page.title}]]") return job.processed_pages.add(page.title) if job.counter % 10 == 0: # Do a shutoff check every ten pages if self.shutoff_enabled(page.site): - raise _ShutoffEnabled() + raise ShutoffEnabled() job.counter += 1 try: @@ -266,7 +338,7 @@ class WikiProjectTagger(Task): self.process_new_page(page, job) return except exceptions.InvalidPageError: - self.logger.error("Skipping invalid page: [[%s]]", page.title) + self.logger.error(f"Skipping invalid page: [[{page.title}]]") return is_update = False @@ -277,8 +349,10 @@ class WikiProjectTagger(Task): is_update = True break else: - log = "Skipping page: [[%s]]; already tagged with '%s'" - self.logger.info(log, page.title, template.name) + self.logger.info( + f"Skipping page: [[{page.title}]]; already tagged with " + f"{template.name!r}" + ) return if job.only_with: @@ -286,20 +360,22 @@ class WikiProjectTagger(Task): template.name.matches(job.only_with) for template in code.ifilter_templates(recursive=True) ): - log = "Skipping page: [[%s]]; fails only-with condition" - self.logger.info(log, page.title) + self.logger.info( + f"Skipping page: [[{page.title}]]; fails only-with condition" + ) return if is_update: updated = self.update_banner(banner, job, code) if not updated: - log = "Skipping page: [[%s]]; already tagged and no updates" - self.logger.info(log, page.title) + self.logger.info( + f"Skipping page: [[{page.title}]]; already tagged and no updates" + ) return - self.logger.info("Updating banner on page: [[%s]]", page.title) + self.logger.info(f"Updating banner on page: [[{page.title}]]") banner = str(banner) else: - self.logger.info("Tagging page: [[%s]]", page.title) + self.logger.info(f"Tagging page: [[{page.title}]]") banner = self.make_banner(job, code) shell = self.get_banner_shell(code) if shell: @@ -309,28 +385,33 @@ class WikiProjectTagger(Task): self.save_page(page, job, str(code), banner) - def process_new_page(self, page, job): - """Try to tag a *page* that doesn't exist yet using the *job*.""" + def process_new_page(self, page: Page, job: Job) -> None: + """ + Try to tag a *page* that doesn't exist yet using the *job*. + """ if job.nocreate or job.only_with: - log = "Skipping nonexistent page: [[%s]]" - self.logger.info(log, page.title) + self.logger.info(f"Skipping nonexistent page: [[{page.title}]]") else: - self.logger.info("Tagging new page: [[%s]]", page.title) + self.logger.info(f"Tagging new page: [[{page.title}]]") banner = self.make_banner(job) self.save_page(page, job, banner, banner) - def save_page(self, page, job, text, banner): - """Save a page with an updated banner.""" + def save_page(self, page: Page, job: Job, text: str, banner: str) -> None: + """ + Save a page with an updated banner. + """ if job.dry_run: - self.logger.debug("[DRY RUN] Banner: %s", banner) + self.logger.debug(f"[DRY RUN] Banner: {banner}") else: summary = job.summary.replace("$3", banner) page.edit(text, self.make_summary(summary), minor=True) - def make_banner(self, job, code=None): - """Return banner text to add based on a *job* and a page's *code*.""" + def make_banner(self, job: Job, code: Wikicode | None = None) -> str: + """ + Return banner text to add based on a *job* and a page's *code*. + """ banner = job.banner - if code is not None and job.autoassess is not False: + if code is not None and job.autoassess: assess, reason = self.get_autoassessment(code, job.autoassess) if assess: banner += "|class=" + assess @@ -340,14 +421,16 @@ class WikiProjectTagger(Task): banner += "|" + "|".join(job.append.split(",")) return "{{" + banner + "}}" - def update_banner(self, banner, job, code): - """Update an existing *banner* based on a *job* and a page's *code*.""" + def update_banner(self, banner: Template, job: Job, code: Wikicode) -> bool: + """ + Update an existing *banner* based on a *job* and a page's *code*. + """ - def has(key): + def has(key: str) -> bool: return banner.has(key) and banner.get(key).value.strip() not in ("", "?") updated = False - if job.autoassess is not False: + if job.autoassess: if not has("class"): assess, reason = self.get_autoassessment(code, job.autoassess) if assess: @@ -362,8 +445,11 @@ class WikiProjectTagger(Task): updated = True return updated - def get_autoassessment(self, code, only_classes=None): - """Get an autoassessment for a page. + def get_autoassessment( + self, code, only_classes: bool | str = False + ) -> tuple[str, str] | tuple[None, None]: + """ + Get an autoassessment for a page. Return (assessed class as a string or None, assessment reason or None). """ @@ -383,6 +469,7 @@ class WikiProjectTagger(Task): "stub", ] else: + assert only_classes, only_classes classnames = [klass.strip().lower() for klass in only_classes.split(",")] classes = {klass: 0 for klass in classnames} @@ -404,19 +491,22 @@ class WikiProjectTagger(Task): return self._upperfirst(rank), "inherit" return None, None - def get_banner_shell(self, code): - """Return the banner shell template within *code*, else ``None``.""" + def get_banner_shell(self, code: Wikicode) -> Template | None: + """ + Return the banner shell template within *code*, else ``None``. + """ regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)\s*(\||\}\})" shells = code.filter_templates(matches=regex) if not shells: shells = code.filter_templates(matches=regex, recursive=True) if shells: - log = "Inserting banner into shell: %s" - self.logger.debug(log, shells[0].name) + self.logger.debug(f"Inserting banner into shell: {shells[0].name}") return shells[0] - def add_banner_to_shell(self, shell, banner): - """Add *banner* to *shell*.""" + def add_banner_to_shell(self, shell: Template, banner: str) -> None: + """ + Add *banner* to *shell*. + """ if shell.has_param(1): if str(shell.get(1).value).endswith("\n"): banner += "\n" @@ -426,18 +516,20 @@ class WikiProjectTagger(Task): else: shell.add(1, banner) - def add_banner(self, code, banner): - """Add *banner* to *code*, following template order conventions.""" + def add_banner(self, code: Wikicode, banner: str) -> None: + """ + Add *banner* to *code*, following template order conventions. + """ predecessor = None for template in code.ifilter_templates(recursive=False): name = template.name.lower().replace("_", " ") for regex in self.TOP_TEMPS: if re.match(regex, name): - self.logger.debug("Skipping past top template: %s", name) + self.logger.debug(f"Skipping past top template: {name}") predecessor = template break if "wikiproject" in name or name.startswith("wp"): - self.logger.debug("Skipping past banner template: %s", name) + self.logger.debug(f"Skipping past banner template: {name}") predecessor = template if predecessor: @@ -451,35 +543,3 @@ class WikiProjectTagger(Task): else: self.logger.debug("Inserting banner at beginning") code.insert(0, banner + "\n") - - -class _Job: - """Represents a single wikiproject-tagging task. - - Stores information on the banner to add, the edit summary to use, whether - or not to autoassess and create new pages from scratch, and a counter of - the number of pages edited. - """ - - def __init__(self, **kwargs): - self.banner = kwargs["banner"] - self.names = kwargs["names"] - self.summary = kwargs["summary"] - self.update = kwargs["update"] - self.append = kwargs["append"] - self.autoassess = kwargs["autoassess"] - self.only_with = kwargs["only_with"] - self.nocreate = kwargs["nocreate"] - self.tag_categories = kwargs["tag_categories"] - self.dry_run = kwargs["dry_run"] - - self.counter = 0 - self.processed_cats = set() - self.processed_pages = set() - - -class _ShutoffEnabled(Exception): - """Raised by process_page() if shutoff is enabled. Caught by run(), which - will then stop the task.""" - - pass diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index fa9a06a..3f32ce8 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -894,7 +894,7 @@ class Site: raise exceptions.APIError(err.format(action, res)) return self._tokens[action] - def namespace_id_to_name(self, ns_id, all=False): + def namespace_id_to_name(self, ns_id: int, all: bool = False) -> str: """Given a namespace ID, returns associated namespace names. If *all* is ``False`` (default), we'll return the first name in the