|
|
@@ -1,6 +1,6 @@ |
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
# |
|
|
|
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> |
|
|
|
# Copyright (C) 2009-2017 Ben Kurtovic <ben.kurtovic@gmail.com> |
|
|
|
# |
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
|
|
|
# of this software and associated documentation files (the "Software"), to deal |
|
|
@@ -30,9 +30,9 @@ class WikiProjectTagger(Task): |
|
|
|
"""A task to tag talk pages with WikiProject banners. |
|
|
|
|
|
|
|
Usage: :command:`earwigbot -t wikiproject_tagger PATH |
|
|
|
--banner BANNER (--category CAT | --file FILE) [--summary SUM] |
|
|
|
[--append TEXT] [--autoassess] [--nocreate] [--recursive NUM] |
|
|
|
[--site SITE]` |
|
|
|
--banner BANNER (--category CAT | --file FILE) [--summary SUM] [--update] |
|
|
|
[--append PARAMS] [--autoassess [CLASSES]] [--only-with BANNER] |
|
|
|
[--nocreate] [--recursive [NUM]] [--genfixes] [--site SITE] [--dry-run]` |
|
|
|
|
|
|
|
.. glossary:: |
|
|
|
|
|
|
@@ -47,21 +47,33 @@ class WikiProjectTagger(Task): |
|
|
|
current directory) |
|
|
|
``--summary SUM`` |
|
|
|
an optional edit summary to use; defaults to |
|
|
|
``"Adding WikiProject banner {{BANNER}}."`` |
|
|
|
``--append TEXT`` |
|
|
|
optional text to append to the banner (after an autoassessment, if |
|
|
|
any), like ``|importance=low`` |
|
|
|
``--autoassess`` |
|
|
|
``"Tagging with WikiProject banner {{BANNER}}."`` |
|
|
|
``--update`` |
|
|
|
updates existing banners with new fields; should include at least one |
|
|
|
of ``--append`` or ``--autoassess`` to be useful |
|
|
|
``--append PARAMS`` |
|
|
|
optional comma-separated parameters to append to the banner (after an |
|
|
|
auto-assessment, if any); use syntax ``importance=low,taskforce=yes`` |
|
|
|
to add ``|importance=low|taskforce=yes`` |
|
|
|
``--autoassess [CLASSES]`` |
|
|
|
try to assess each article's class automatically based on the class of |
|
|
|
other banners on the same page |
|
|
|
other banners on the same page; if CLASSES is given as a |
|
|
|
comma-separated list, only those classes will be auto-assessed |
|
|
|
``--only-with BANNER`` |
|
|
|
only tag pages that already have the given banner |
|
|
|
``--nocreate`` |
|
|
|
don't create new talk pages with just a banner if the page doesn't |
|
|
|
already exist |
|
|
|
``--recursive NUM`` |
|
|
|
recursively go through subcategories up to a maximum depth of ``NUM``, |
|
|
|
or if ``NUM`` isn't provided, go infinitely (this can be dangerous) |
|
|
|
``--genfixes`` |
|
|
|
apply general fixes to the page if already making other changes |
|
|
|
``--site SITE`` |
|
|
|
the ID of the site to tag pages on, defaulting to the... default site |
|
|
|
the ID of the site to tag pages on, defaulting to the default site |
|
|
|
``--dry-run`` |
|
|
|
don't actually make any edits, just log the pages that would have been |
|
|
|
edited |
|
|
|
|
|
|
|
""" |
|
|
|
name = "wikiproject_tagger" |
|
|
@@ -94,7 +106,8 @@ class WikiProjectTagger(Task): |
|
|
|
r"((wikiproject|wp) ?)?bio(graph(y|ies))?$", |
|
|
|
] |
|
|
|
|
|
|
|
def _upperfirst(self, text): |
|
|
|
@staticmethod |
|
|
|
def _upperfirst(text): |
|
|
|
"""Try to uppercase the first letter of a string.""" |
|
|
|
try: |
|
|
|
return text[0].upper() + text[1:] |
|
|
@@ -114,15 +127,28 @@ class WikiProjectTagger(Task): |
|
|
|
|
|
|
|
site = self.bot.wiki.get_site(name=kwargs.get("site")) |
|
|
|
banner = kwargs["banner"] |
|
|
|
summary = kwargs.get("summary", "Adding WikiProject banner $3.") |
|
|
|
summary = kwargs.get("summary", "Tagging with WikiProject banner $3.") |
|
|
|
update = kwargs.get("update", False) |
|
|
|
append = kwargs.get("append") |
|
|
|
autoassess = kwargs.get("autoassess", False) |
|
|
|
ow_banner = kwargs.get("only-with") |
|
|
|
nocreate = kwargs.get("nocreate", False) |
|
|
|
recursive = kwargs.get("recursive", 0) |
|
|
|
genfixes = kwargs.get("genfixes", False) |
|
|
|
dry_run = kwargs.get("dry-run", False) |
|
|
|
banner, names = self.get_names(site, banner) |
|
|
|
if not names: |
|
|
|
return |
|
|
|
job = _Job(banner, names, summary, append, autoassess, nocreate) |
|
|
|
if ow_banner: |
|
|
|
_, only_with = self.get_names(site, ow_banner) |
|
|
|
if not only_with: |
|
|
|
return |
|
|
|
else: |
|
|
|
only_with = None |
|
|
|
|
|
|
|
job = _Job(banner=banner, names=names, summary=summary, update=update, |
|
|
|
append=append, autoassess=autoassess, only_with=only_with, |
|
|
|
nocreate=nocreate, genfixes=genfixes, dry_run=dry_run) |
|
|
|
|
|
|
|
try: |
|
|
|
self.run_job(kwargs, site, job, recursive) |
|
|
@@ -172,32 +198,29 @@ class WikiProjectTagger(Task): |
|
|
|
banner = banner.split(":", 1)[1] |
|
|
|
page = site.get_page(title) |
|
|
|
if page.exists != page.PAGE_EXISTS: |
|
|
|
self.logger.error(u"Banner [[{0}]] does not exist".format(title)) |
|
|
|
self.logger.error(u"Banner [[%s]] does not exist", title) |
|
|
|
return banner, None |
|
|
|
|
|
|
|
if banner == title: |
|
|
|
names = [self._upperfirst(banner)] |
|
|
|
else: |
|
|
|
names = [self._upperfirst(banner), self._upperfirst(title)] |
|
|
|
names = {banner, title} |
|
|
|
result = site.api_query(action="query", list="backlinks", bllimit=500, |
|
|
|
blfilterredir="redirects", bltitle=title) |
|
|
|
for backlink in result["query"]["backlinks"]: |
|
|
|
names.append(backlink["title"]) |
|
|
|
names.add(backlink["title"]) |
|
|
|
if backlink["ns"] == constants.NS_TEMPLATE: |
|
|
|
names.append(backlink["title"].split(":", 1)[1]) |
|
|
|
names.add(backlink["title"].split(":", 1)[1]) |
|
|
|
|
|
|
|
log = u"Found {0} aliases for banner [[{1}]]".format(len(names), title) |
|
|
|
self.logger.debug(log) |
|
|
|
log = u"Found %s aliases for banner [[%s]]" |
|
|
|
self.logger.debug(log, len(names), title) |
|
|
|
return banner, names |
|
|
|
|
|
|
|
def process_category(self, page, job, recursive): |
|
|
|
"""Try to tag all pages in the given category.""" |
|
|
|
self.logger.info(u"Processing category: [[{0]]".format(page.title)) |
|
|
|
self.logger.info(u"Processing category: [[%s]]", page.title) |
|
|
|
for member in page.get_members(): |
|
|
|
if member.namespace == constants.NS_CATEGORY: |
|
|
|
if recursive is True: |
|
|
|
self.process_category(member, job, True) |
|
|
|
elif recursive: |
|
|
|
elif recursive > 0: |
|
|
|
self.process_category(member, job, recursive - 1) |
|
|
|
else: |
|
|
|
self.process_page(member, job) |
|
|
@@ -214,65 +237,125 @@ class WikiProjectTagger(Task): |
|
|
|
try: |
|
|
|
code = page.parse() |
|
|
|
except exceptions.PageNotFoundError: |
|
|
|
if job.nocreate: |
|
|
|
log = u"Skipping nonexistent page: [[{0}]]".format(page.title) |
|
|
|
self.logger.info(log) |
|
|
|
else: |
|
|
|
log = u"Tagging new page: [[{0}]]".format(page.title) |
|
|
|
self.logger.info(log) |
|
|
|
banner = "{{" + job.banner + job.append + "}}" |
|
|
|
summary = job.summary.replace("$3", banner) |
|
|
|
page.edit(banner, self.make_summary(summary)) |
|
|
|
self.process_new_page(page, job) |
|
|
|
return |
|
|
|
except exceptions.InvalidPageError: |
|
|
|
log = u"Skipping invalid page: [[{0}]]".format(page.title) |
|
|
|
self.logger.error(log) |
|
|
|
self.logger.error(u"Skipping invalid page: [[%s]]", page.title) |
|
|
|
return |
|
|
|
|
|
|
|
is_update = False |
|
|
|
for template in code.ifilter_templates(recursive=True): |
|
|
|
name = self._upperfirst(template.name.strip()) |
|
|
|
if name in job.names: |
|
|
|
log = u"Skipping page: [[{0}]]; already tagged with '{1}'" |
|
|
|
self.logger.info(log.format(page.title, name)) |
|
|
|
if template.name.matches(job.names): |
|
|
|
if job.update: |
|
|
|
banner = template |
|
|
|
is_update = True |
|
|
|
break |
|
|
|
else: |
|
|
|
log = u"Skipping page: [[%s]]; already tagged with '%s'" |
|
|
|
self.logger.info(log, page.title, template.name) |
|
|
|
return |
|
|
|
|
|
|
|
if job.only_with: |
|
|
|
if not any(template.name.matches(job.only_with) |
|
|
|
for template in code.ifilter_templates(recursive=True)): |
|
|
|
log = u"Skipping page: [[%s]]; fails only-with condition" |
|
|
|
self.logger.info(log, page.title) |
|
|
|
return |
|
|
|
|
|
|
|
banner = self.make_banner(job, code) |
|
|
|
shell = self.get_banner_shell(code) |
|
|
|
if shell: |
|
|
|
if shell.has_param(1): |
|
|
|
shell.get(1).value.insert(0, banner + "\n") |
|
|
|
else: |
|
|
|
shell.add(1, banner) |
|
|
|
if is_update: |
|
|
|
old_banner = unicode(banner) |
|
|
|
self.update_banner(banner, job, code) |
|
|
|
if banner == old_banner: |
|
|
|
log = u"Skipping page: [[%s]]; already tagged and no updates" |
|
|
|
self.logger.info(log, page.title) |
|
|
|
return |
|
|
|
self.logger.info(u"Updating banner on page: [[%s]]", page.title) |
|
|
|
else: |
|
|
|
self.add_banner(code, banner) |
|
|
|
self.apply_genfixes(code) |
|
|
|
self.logger.info(u"Tagging page: [[%s]]", page.title) |
|
|
|
banner = self.make_banner(job, code) |
|
|
|
shell = self.get_banner_shell(code) |
|
|
|
if shell: |
|
|
|
if shell.has_param(1): |
|
|
|
shell.get(1).value.insert(0, banner + "\n") |
|
|
|
else: |
|
|
|
shell.add(1, banner) |
|
|
|
else: |
|
|
|
self.add_banner(code, banner) |
|
|
|
|
|
|
|
self.logger.info(u"Tagging page: [[{0}]]".format(page.title)) |
|
|
|
summary = job.summary.replace("$3", banner) |
|
|
|
page.edit(unicode(code), self.make_summary(summary)) |
|
|
|
if job.genfixes: |
|
|
|
self.apply_genfixes(code) |
|
|
|
|
|
|
|
def make_banner(self, job, code): |
|
|
|
if job.dry_run: |
|
|
|
self.logger.debug(u"DRY RUN: Banner: %s", banner) |
|
|
|
else: |
|
|
|
summary = job.summary.replace("$3", banner) |
|
|
|
page.edit(unicode(code), self.make_summary(summary)) |
|
|
|
|
|
|
|
def process_new_page(self, page, job): |
|
|
|
"""Try to tag a *page* that doesn't exist yet using the *job*.""" |
|
|
|
if job.nocreate or job.only_with: |
|
|
|
log = u"Skipping nonexistent page: [[%s]]" |
|
|
|
self.logger.info(log, page.title) |
|
|
|
else: |
|
|
|
self.logger.info(u"Tagging new page: [[%s]]", page.title) |
|
|
|
banner = self.make_banner(job) |
|
|
|
if job.dry_run: |
|
|
|
self.logger.debug(u"DRY RUN: Banner: %s", banner) |
|
|
|
else: |
|
|
|
summary = job.summary.replace("$3", banner) |
|
|
|
page.edit(banner, self.make_summary(summary)) |
|
|
|
|
|
|
|
def make_banner(self, job, code=None): |
|
|
|
"""Return banner text to add based on a *job* and a page's *code*.""" |
|
|
|
banner = "{{" + job.banner |
|
|
|
if job.autoassess: |
|
|
|
classes = {"fa": 0, "fl": 0, "ga": 0, "a": 0, "b": 0, "start": 0, |
|
|
|
"stub": 0, "list": 0, "dab": 0, "c": 0, "redirect": 0, |
|
|
|
"book": 0, "template": 0, "category": 0} |
|
|
|
for template in code.ifilter_templates(recursive=True): |
|
|
|
if template.has_param("class"): |
|
|
|
value = unicode(template.get("class").value).lower() |
|
|
|
if value in classes: |
|
|
|
classes[value] += 1 |
|
|
|
values = tuple(classes.values()) |
|
|
|
banner = job.banner |
|
|
|
if code is not None and job.autoassess is not False: |
|
|
|
assessment = self.get_autoassessment(code, job.autoassess) |
|
|
|
if assessment: |
|
|
|
banner += "|class=" + assessment |
|
|
|
if job.append: |
|
|
|
banner += "|" + "|".join(job.append.split(",")) |
|
|
|
return "{{" + banner + "}}" |
|
|
|
|
|
|
|
def update_banner(self, banner, job, code): |
|
|
|
"""Update an existing *banner* based on a *job* and a page's *code*.""" |
|
|
|
if job.autoassess is not False: |
|
|
|
if not banner.has("class") or not banner.get("class").value: |
|
|
|
assessment = self.get_autoassessment(code, job.autoassess) |
|
|
|
if assessment: |
|
|
|
banner.add("class", assessment) |
|
|
|
if job.append: |
|
|
|
for param in job.append.split(","): |
|
|
|
key, value = param.split("=", 1) |
|
|
|
if not banner.has(key) or not banner.get(key).value: |
|
|
|
banner.add(key, value) |
|
|
|
|
|
|
|
def get_autoassessment(self, code, only_classes=None): |
|
|
|
if only_classes is None: |
|
|
|
classnames = ["a", "b", "book", "c", "category", "dab", "fa", |
|
|
|
"fl", "ga", "list", "redirect", "start", "stub", |
|
|
|
"template"] |
|
|
|
else: |
|
|
|
classnames = [klass.strip().lower() |
|
|
|
for klass in only_classes.split(",")] |
|
|
|
|
|
|
|
classes = {klass: 0 for klass in classnames} |
|
|
|
for template in code.ifilter_templates(recursive=True): |
|
|
|
if template.has("class"): |
|
|
|
value = unicode(template.get("class").value).lower() |
|
|
|
if value in classes: |
|
|
|
classes[value] += 1 |
|
|
|
|
|
|
|
values = tuple(classes.values()) |
|
|
|
if values: |
|
|
|
best = max(values) |
|
|
|
confidence = float(best) / sum(values) |
|
|
|
if confidence > 0.75: |
|
|
|
rank = tuple(classes.keys())[values.index(best)] |
|
|
|
if rank in ("fa", "fl", "ga"): |
|
|
|
banner += "|class=" + rank.upper() |
|
|
|
return rank.upper() |
|
|
|
else: |
|
|
|
banner += "|class=" + self._upperfirst(rank) |
|
|
|
return banner + job.append + "}}" |
|
|
|
return self._upperfirst(rank) |
|
|
|
return None |
|
|
|
|
|
|
|
def get_banner_shell(self, code): |
|
|
|
"""Return the banner shell template within *code*, else ``None``.""" |
|
|
@@ -281,8 +364,8 @@ class WikiProjectTagger(Task): |
|
|
|
if not shells: |
|
|
|
shells = code.filter_templates(matches=regex, recursive=True) |
|
|
|
if shells: |
|
|
|
log = u"Inserting banner into shell: {0}" |
|
|
|
self.logger.debug(log.format(shells[0].name)) |
|
|
|
log = u"Inserting banner into shell: %s" |
|
|
|
self.logger.debug(log, shells[0].name) |
|
|
|
return shells[0] |
|
|
|
|
|
|
|
def add_banner(self, code, banner): |
|
|
@@ -292,15 +375,16 @@ class WikiProjectTagger(Task): |
|
|
|
name = template.name.lower().replace("_", " ") |
|
|
|
for regex in self.TOP_TEMPS: |
|
|
|
if re.match(regex, name): |
|
|
|
self.logger.info("Skipping top template: {0}".format(name)) |
|
|
|
self.logger.debug(u"Skipping top template: %s", name) |
|
|
|
index = i + 1 |
|
|
|
|
|
|
|
self.logger.debug(u"Inserting banner at index {0}".format(index)) |
|
|
|
self.logger.debug(u"Inserting banner at index %s", index) |
|
|
|
code.insert(index, banner) |
|
|
|
|
|
|
|
def apply_genfixes(self, code): |
|
|
|
"""Apply general fixes to *code*, such as template substitution.""" |
|
|
|
regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)" |
|
|
|
regex = (r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|" |
|
|
|
r"forgot to sign|without signature)") |
|
|
|
for template in code.ifilter_templates(matches=regex): |
|
|
|
self.logger.debug("Applying genfix: substitute {{unsigned}}") |
|
|
|
template.name = "subst:unsigned" |
|
|
@@ -313,13 +397,17 @@ class _Job(object): |
|
|
|
or not to autoassess and create new pages from scratch, and a counter of |
|
|
|
the number of pages edited. |
|
|
|
""" |
|
|
|
def __init__(self, banner, names, summary, append, autoassess, nocreate): |
|
|
|
self.banner = banner |
|
|
|
self.names = names |
|
|
|
self.summary = summary |
|
|
|
self.append = append |
|
|
|
self.autoassess = autoassess |
|
|
|
self.nocreate = nocreate |
|
|
|
def __init__(self, **kwargs): |
|
|
|
self.banner = kwargs["banner"] |
|
|
|
self.names = kwargs["names"] |
|
|
|
self.summary = kwargs["summary"] |
|
|
|
self.update = kwargs["update"] |
|
|
|
self.append = kwargs["append"] |
|
|
|
self.autoassess = kwargs["autoassess"] |
|
|
|
self.only_with = kwargs["only_with"] |
|
|
|
self.nocreate = kwargs["nocreate"] |
|
|
|
self.genfixes = kwargs["genfixes"] |
|
|
|
self.dry_run = kwargs["dry_run"] |
|
|
|
self.counter = 0 |
|
|
|
|
|
|
|
|
|
|
|