From 39b63f11c17b0a730d41829e9d2166c8182a4dc0 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 15 Jan 2017 02:18:22 -0600 Subject: [PATCH] Add a bunch of things to the WikiProjectTagger task. --- CHANGELOG | 1 + earwigbot/tasks/wikiproject_tagger.py | 246 +++++++++++++++++++++++----------- 2 files changed, 168 insertions(+), 79 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 25f3b5b..7126c4b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ v0.3 (unreleased): +- Added various new features to the WikiProjectTagger task. - Copyvio detector: improved sentence splitting algorithm. - Improved config file command/task exclusion logic. - IRC > !cidr: Added; new command for calculating range blocks. diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index 6884d76..58b7794 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2015 Ben Kurtovic +# Copyright (C) 2009-2017 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -30,9 +30,9 @@ class WikiProjectTagger(Task): """A task to tag talk pages with WikiProject banners. Usage: :command:`earwigbot -t wikiproject_tagger PATH - --banner BANNER (--category CAT | --file FILE) [--summary SUM] - [--append TEXT] [--autoassess] [--nocreate] [--recursive NUM] - [--site SITE]` + --banner BANNER (--category CAT | --file FILE) [--summary SUM] [--update] + [--append PARAMS] [--autoassess [CLASSES]] [--only-with BANNER] + [--nocreate] [--recursive [NUM]] [--genfixes] [--site SITE] [--dry-run]` .. glossary:: @@ -47,21 +47,33 @@ class WikiProjectTagger(Task): current directory) ``--summary SUM`` an optional edit summary to use; defaults to - ``"Adding WikiProject banner {{BANNER}}."`` - ``--append TEXT`` - optional text to append to the banner (after an autoassessment, if - any), like ``|importance=low`` - ``--autoassess`` + ``"Tagging with WikiProject banner {{BANNER}}."`` + ``--update`` + updates existing banners with new fields; should include at least one + of ``--append`` or ``--autoassess`` to be useful + ``--append PARAMS`` + optional comma-separated parameters to append to the banner (after an + auto-assessment, if any); use syntax ``importance=low,taskforce=yes`` + to add ``|importance=low|taskforce=yes`` + ``--autoassess [CLASSES]`` try to assess each article's class automatically based on the class of - other banners on the same page + other banners on the same page; if CLASSES is given as a + comma-separated list, only those classes will be auto-assessed + ``--only-with BANNER`` + only tag pages that already have the given banner ``--nocreate`` don't create new talk pages with just a banner if the page doesn't already exist ``--recursive NUM`` recursively go through subcategories up to a maximum depth of ``NUM``, or if ``NUM`` isn't provided, go infinitely (this can be dangerous) + ``--genfixes`` + apply general fixes to the page if already making other changes ``--site SITE`` - the ID of the site to tag pages on, defaulting to the... default site + the ID of the site to tag pages on, defaulting to the default site + ``--dry-run`` + don't actually make any edits, just log the pages that would have been + edited """ name = "wikiproject_tagger" @@ -94,7 +106,8 @@ class WikiProjectTagger(Task): r"((wikiproject|wp) ?)?bio(graph(y|ies))?$", ] - def _upperfirst(self, text): + @staticmethod + def _upperfirst(text): """Try to uppercase the first letter of a string.""" try: return text[0].upper() + text[1:] @@ -114,15 +127,28 @@ class WikiProjectTagger(Task): site = self.bot.wiki.get_site(name=kwargs.get("site")) banner = kwargs["banner"] - summary = kwargs.get("summary", "Adding WikiProject banner $3.") + summary = kwargs.get("summary", "Tagging with WikiProject banner $3.") + update = kwargs.get("update", False) append = kwargs.get("append") autoassess = kwargs.get("autoassess", False) + ow_banner = kwargs.get("only-with") nocreate = kwargs.get("nocreate", False) recursive = kwargs.get("recursive", 0) + genfixes = kwargs.get("genfixes", False) + dry_run = kwargs.get("dry-run", False) banner, names = self.get_names(site, banner) if not names: return - job = _Job(banner, names, summary, append, autoassess, nocreate) + if ow_banner: + _, only_with = self.get_names(site, ow_banner) + if not only_with: + return + else: + only_with = None + + job = _Job(banner=banner, names=names, summary=summary, update=update, + append=append, autoassess=autoassess, only_with=only_with, + nocreate=nocreate, genfixes=genfixes, dry_run=dry_run) try: self.run_job(kwargs, site, job, recursive) @@ -172,32 +198,29 @@ class WikiProjectTagger(Task): banner = banner.split(":", 1)[1] page = site.get_page(title) if page.exists != page.PAGE_EXISTS: - self.logger.error(u"Banner [[{0}]] does not exist".format(title)) + self.logger.error(u"Banner [[%s]] does not exist", title) return banner, None - if banner == title: - names = [self._upperfirst(banner)] - else: - names = [self._upperfirst(banner), self._upperfirst(title)] + names = {banner, title} result = site.api_query(action="query", list="backlinks", bllimit=500, blfilterredir="redirects", bltitle=title) for backlink in result["query"]["backlinks"]: - names.append(backlink["title"]) + names.add(backlink["title"]) if backlink["ns"] == constants.NS_TEMPLATE: - names.append(backlink["title"].split(":", 1)[1]) + names.add(backlink["title"].split(":", 1)[1]) - log = u"Found {0} aliases for banner [[{1}]]".format(len(names), title) - self.logger.debug(log) + log = u"Found %s aliases for banner [[%s]]" + self.logger.debug(log, len(names), title) return banner, names def process_category(self, page, job, recursive): """Try to tag all pages in the given category.""" - self.logger.info(u"Processing category: [[{0]]".format(page.title)) + self.logger.info(u"Processing category: [[%s]]", page.title) for member in page.get_members(): if member.namespace == constants.NS_CATEGORY: if recursive is True: self.process_category(member, job, True) - elif recursive: + elif recursive > 0: self.process_category(member, job, recursive - 1) else: self.process_page(member, job) @@ -214,65 +237,125 @@ class WikiProjectTagger(Task): try: code = page.parse() except exceptions.PageNotFoundError: - if job.nocreate: - log = u"Skipping nonexistent page: [[{0}]]".format(page.title) - self.logger.info(log) - else: - log = u"Tagging new page: [[{0}]]".format(page.title) - self.logger.info(log) - banner = "{{" + job.banner + job.append + "}}" - summary = job.summary.replace("$3", banner) - page.edit(banner, self.make_summary(summary)) + self.process_new_page(page, job) return except exceptions.InvalidPageError: - log = u"Skipping invalid page: [[{0}]]".format(page.title) - self.logger.error(log) + self.logger.error(u"Skipping invalid page: [[%s]]", page.title) return + is_update = False for template in code.ifilter_templates(recursive=True): - name = self._upperfirst(template.name.strip()) - if name in job.names: - log = u"Skipping page: [[{0}]]; already tagged with '{1}'" - self.logger.info(log.format(page.title, name)) + if template.name.matches(job.names): + if job.update: + banner = template + is_update = True + break + else: + log = u"Skipping page: [[%s]]; already tagged with '%s'" + self.logger.info(log, page.title, template.name) + return + + if job.only_with: + if not any(template.name.matches(job.only_with) + for template in code.ifilter_templates(recursive=True)): + log = u"Skipping page: [[%s]]; fails only-with condition" + self.logger.info(log, page.title) return - banner = self.make_banner(job, code) - shell = self.get_banner_shell(code) - if shell: - if shell.has_param(1): - shell.get(1).value.insert(0, banner + "\n") - else: - shell.add(1, banner) + if is_update: + old_banner = unicode(banner) + self.update_banner(banner, job, code) + if banner == old_banner: + log = u"Skipping page: [[%s]]; already tagged and no updates" + self.logger.info(log, page.title) + return + self.logger.info(u"Updating banner on page: [[%s]]", page.title) else: - self.add_banner(code, banner) - self.apply_genfixes(code) + self.logger.info(u"Tagging page: [[%s]]", page.title) + banner = self.make_banner(job, code) + shell = self.get_banner_shell(code) + if shell: + if shell.has_param(1): + shell.get(1).value.insert(0, banner + "\n") + else: + shell.add(1, banner) + else: + self.add_banner(code, banner) - self.logger.info(u"Tagging page: [[{0}]]".format(page.title)) - summary = job.summary.replace("$3", banner) - page.edit(unicode(code), self.make_summary(summary)) + if job.genfixes: + self.apply_genfixes(code) - def make_banner(self, job, code): + if job.dry_run: + self.logger.debug(u"DRY RUN: Banner: %s", banner) + else: + summary = job.summary.replace("$3", banner) + page.edit(unicode(code), self.make_summary(summary)) + + def process_new_page(self, page, job): + """Try to tag a *page* that doesn't exist yet using the *job*.""" + if job.nocreate or job.only_with: + log = u"Skipping nonexistent page: [[%s]]" + self.logger.info(log, page.title) + else: + self.logger.info(u"Tagging new page: [[%s]]", page.title) + banner = self.make_banner(job) + if job.dry_run: + self.logger.debug(u"DRY RUN: Banner: %s", banner) + else: + summary = job.summary.replace("$3", banner) + page.edit(banner, self.make_summary(summary)) + + def make_banner(self, job, code=None): """Return banner text to add based on a *job* and a page's *code*.""" - banner = "{{" + job.banner - if job.autoassess: - classes = {"fa": 0, "fl": 0, "ga": 0, "a": 0, "b": 0, "start": 0, - "stub": 0, "list": 0, "dab": 0, "c": 0, "redirect": 0, - "book": 0, "template": 0, "category": 0} - for template in code.ifilter_templates(recursive=True): - if template.has_param("class"): - value = unicode(template.get("class").value).lower() - if value in classes: - classes[value] += 1 - values = tuple(classes.values()) + banner = job.banner + if code is not None and job.autoassess is not False: + assessment = self.get_autoassessment(code, job.autoassess) + if assessment: + banner += "|class=" + assessment + if job.append: + banner += "|" + "|".join(job.append.split(",")) + return "{{" + banner + "}}" + + def update_banner(self, banner, job, code): + """Update an existing *banner* based on a *job* and a page's *code*.""" + if job.autoassess is not False: + if not banner.has("class") or not banner.get("class").value: + assessment = self.get_autoassessment(code, job.autoassess) + if assessment: + banner.add("class", assessment) + if job.append: + for param in job.append.split(","): + key, value = param.split("=", 1) + if not banner.has(key) or not banner.get(key).value: + banner.add(key, value) + + def get_autoassessment(self, code, only_classes=None): + if only_classes is None: + classnames = ["a", "b", "book", "c", "category", "dab", "fa", + "fl", "ga", "list", "redirect", "start", "stub", + "template"] + else: + classnames = [klass.strip().lower() + for klass in only_classes.split(",")] + + classes = {klass: 0 for klass in classnames} + for template in code.ifilter_templates(recursive=True): + if template.has("class"): + value = unicode(template.get("class").value).lower() + if value in classes: + classes[value] += 1 + + values = tuple(classes.values()) + if values: best = max(values) confidence = float(best) / sum(values) if confidence > 0.75: rank = tuple(classes.keys())[values.index(best)] if rank in ("fa", "fl", "ga"): - banner += "|class=" + rank.upper() + return rank.upper() else: - banner += "|class=" + self._upperfirst(rank) - return banner + job.append + "}}" + return self._upperfirst(rank) + return None def get_banner_shell(self, code): """Return the banner shell template within *code*, else ``None``.""" @@ -281,8 +364,8 @@ class WikiProjectTagger(Task): if not shells: shells = code.filter_templates(matches=regex, recursive=True) if shells: - log = u"Inserting banner into shell: {0}" - self.logger.debug(log.format(shells[0].name)) + log = u"Inserting banner into shell: %s" + self.logger.debug(log, shells[0].name) return shells[0] def add_banner(self, code, banner): @@ -292,15 +375,16 @@ class WikiProjectTagger(Task): name = template.name.lower().replace("_", " ") for regex in self.TOP_TEMPS: if re.match(regex, name): - self.logger.info("Skipping top template: {0}".format(name)) + self.logger.debug(u"Skipping top template: %s", name) index = i + 1 - self.logger.debug(u"Inserting banner at index {0}".format(index)) + self.logger.debug(u"Inserting banner at index %s", index) code.insert(index, banner) def apply_genfixes(self, code): """Apply general fixes to *code*, such as template substitution.""" - regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)" + regex = (r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|" + r"forgot to sign|without signature)") for template in code.ifilter_templates(matches=regex): self.logger.debug("Applying genfix: substitute {{unsigned}}") template.name = "subst:unsigned" @@ -313,13 +397,17 @@ class _Job(object): or not to autoassess and create new pages from scratch, and a counter of the number of pages edited. """ - def __init__(self, banner, names, summary, append, autoassess, nocreate): - self.banner = banner - self.names = names - self.summary = summary - self.append = append - self.autoassess = autoassess - self.nocreate = nocreate + def __init__(self, **kwargs): + self.banner = kwargs["banner"] + self.names = kwargs["names"] + self.summary = kwargs["summary"] + self.update = kwargs["update"] + self.append = kwargs["append"] + self.autoassess = kwargs["autoassess"] + self.only_with = kwargs["only_with"] + self.nocreate = kwargs["nocreate"] + self.genfixes = kwargs["genfixes"] + self.dry_run = kwargs["dry_run"] self.counter = 0