Browse Source

Add a bunch of things to the WikiProjectTagger task.

tags/v0.3
Ben Kurtovic 7 years ago
parent
commit
39b63f11c1
2 changed files with 168 additions and 79 deletions
  1. +1
    -0
      CHANGELOG
  2. +167
    -79
      earwigbot/tasks/wikiproject_tagger.py

+ 1
- 0
CHANGELOG View File

@@ -1,5 +1,6 @@
v0.3 (unreleased): v0.3 (unreleased):


- Added various new features to the WikiProjectTagger task.
- Copyvio detector: improved sentence splitting algorithm. - Copyvio detector: improved sentence splitting algorithm.
- Improved config file command/task exclusion logic. - Improved config file command/task exclusion logic.
- IRC > !cidr: Added; new command for calculating range blocks. - IRC > !cidr: Added; new command for calculating range blocks.


+ 167
- 79
earwigbot/tasks/wikiproject_tagger.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -30,9 +30,9 @@ class WikiProjectTagger(Task):
"""A task to tag talk pages with WikiProject banners. """A task to tag talk pages with WikiProject banners.


Usage: :command:`earwigbot -t wikiproject_tagger PATH Usage: :command:`earwigbot -t wikiproject_tagger PATH
--banner BANNER (--category CAT | --file FILE) [--summary SUM]
[--append TEXT] [--autoassess] [--nocreate] [--recursive NUM]
[--site SITE]`
--banner BANNER (--category CAT | --file FILE) [--summary SUM] [--update]
[--append PARAMS] [--autoassess [CLASSES]] [--only-with BANNER]
[--nocreate] [--recursive [NUM]] [--genfixes] [--site SITE] [--dry-run]`


.. glossary:: .. glossary::


@@ -47,21 +47,33 @@ class WikiProjectTagger(Task):
current directory) current directory)
``--summary SUM`` ``--summary SUM``
an optional edit summary to use; defaults to an optional edit summary to use; defaults to
``"Adding WikiProject banner {{BANNER}}."``
``--append TEXT``
optional text to append to the banner (after an autoassessment, if
any), like ``|importance=low``
``--autoassess``
``"Tagging with WikiProject banner {{BANNER}}."``
``--update``
updates existing banners with new fields; should include at least one
of ``--append`` or ``--autoassess`` to be useful
``--append PARAMS``
optional comma-separated parameters to append to the banner (after an
auto-assessment, if any); use syntax ``importance=low,taskforce=yes``
to add ``|importance=low|taskforce=yes``
``--autoassess [CLASSES]``
try to assess each article's class automatically based on the class of try to assess each article's class automatically based on the class of
other banners on the same page
other banners on the same page; if CLASSES is given as a
comma-separated list, only those classes will be auto-assessed
``--only-with BANNER``
only tag pages that already have the given banner
``--nocreate`` ``--nocreate``
don't create new talk pages with just a banner if the page doesn't don't create new talk pages with just a banner if the page doesn't
already exist already exist
``--recursive NUM`` ``--recursive NUM``
recursively go through subcategories up to a maximum depth of ``NUM``, recursively go through subcategories up to a maximum depth of ``NUM``,
or if ``NUM`` isn't provided, go infinitely (this can be dangerous) or if ``NUM`` isn't provided, go infinitely (this can be dangerous)
``--genfixes``
apply general fixes to the page if already making other changes
``--site SITE`` ``--site SITE``
the ID of the site to tag pages on, defaulting to the... default site
the ID of the site to tag pages on, defaulting to the default site
``--dry-run``
don't actually make any edits, just log the pages that would have been
edited


""" """
name = "wikiproject_tagger" name = "wikiproject_tagger"
@@ -94,7 +106,8 @@ class WikiProjectTagger(Task):
r"((wikiproject|wp) ?)?bio(graph(y|ies))?$", r"((wikiproject|wp) ?)?bio(graph(y|ies))?$",
] ]


def _upperfirst(self, text):
@staticmethod
def _upperfirst(text):
"""Try to uppercase the first letter of a string.""" """Try to uppercase the first letter of a string."""
try: try:
return text[0].upper() + text[1:] return text[0].upper() + text[1:]
@@ -114,15 +127,28 @@ class WikiProjectTagger(Task):


site = self.bot.wiki.get_site(name=kwargs.get("site")) site = self.bot.wiki.get_site(name=kwargs.get("site"))
banner = kwargs["banner"] banner = kwargs["banner"]
summary = kwargs.get("summary", "Adding WikiProject banner $3.")
summary = kwargs.get("summary", "Tagging with WikiProject banner $3.")
update = kwargs.get("update", False)
append = kwargs.get("append") append = kwargs.get("append")
autoassess = kwargs.get("autoassess", False) autoassess = kwargs.get("autoassess", False)
ow_banner = kwargs.get("only-with")
nocreate = kwargs.get("nocreate", False) nocreate = kwargs.get("nocreate", False)
recursive = kwargs.get("recursive", 0) recursive = kwargs.get("recursive", 0)
genfixes = kwargs.get("genfixes", False)
dry_run = kwargs.get("dry-run", False)
banner, names = self.get_names(site, banner) banner, names = self.get_names(site, banner)
if not names: if not names:
return return
job = _Job(banner, names, summary, append, autoassess, nocreate)
if ow_banner:
_, only_with = self.get_names(site, ow_banner)
if not only_with:
return
else:
only_with = None

job = _Job(banner=banner, names=names, summary=summary, update=update,
append=append, autoassess=autoassess, only_with=only_with,
nocreate=nocreate, genfixes=genfixes, dry_run=dry_run)


try: try:
self.run_job(kwargs, site, job, recursive) self.run_job(kwargs, site, job, recursive)
@@ -172,32 +198,29 @@ class WikiProjectTagger(Task):
banner = banner.split(":", 1)[1] banner = banner.split(":", 1)[1]
page = site.get_page(title) page = site.get_page(title)
if page.exists != page.PAGE_EXISTS: if page.exists != page.PAGE_EXISTS:
self.logger.error(u"Banner [[{0}]] does not exist".format(title))
self.logger.error(u"Banner [[%s]] does not exist", title)
return banner, None return banner, None


if banner == title:
names = [self._upperfirst(banner)]
else:
names = [self._upperfirst(banner), self._upperfirst(title)]
names = {banner, title}
result = site.api_query(action="query", list="backlinks", bllimit=500, result = site.api_query(action="query", list="backlinks", bllimit=500,
blfilterredir="redirects", bltitle=title) blfilterredir="redirects", bltitle=title)
for backlink in result["query"]["backlinks"]: for backlink in result["query"]["backlinks"]:
names.append(backlink["title"])
names.add(backlink["title"])
if backlink["ns"] == constants.NS_TEMPLATE: if backlink["ns"] == constants.NS_TEMPLATE:
names.append(backlink["title"].split(":", 1)[1])
names.add(backlink["title"].split(":", 1)[1])


log = u"Found {0} aliases for banner [[{1}]]".format(len(names), title)
self.logger.debug(log)
log = u"Found %s aliases for banner [[%s]]"
self.logger.debug(log, len(names), title)
return banner, names return banner, names


def process_category(self, page, job, recursive): def process_category(self, page, job, recursive):
"""Try to tag all pages in the given category.""" """Try to tag all pages in the given category."""
self.logger.info(u"Processing category: [[{0]]".format(page.title))
self.logger.info(u"Processing category: [[%s]]", page.title)
for member in page.get_members(): for member in page.get_members():
if member.namespace == constants.NS_CATEGORY: if member.namespace == constants.NS_CATEGORY:
if recursive is True: if recursive is True:
self.process_category(member, job, True) self.process_category(member, job, True)
elif recursive:
elif recursive > 0:
self.process_category(member, job, recursive - 1) self.process_category(member, job, recursive - 1)
else: else:
self.process_page(member, job) self.process_page(member, job)
@@ -214,65 +237,125 @@ class WikiProjectTagger(Task):
try: try:
code = page.parse() code = page.parse()
except exceptions.PageNotFoundError: except exceptions.PageNotFoundError:
if job.nocreate:
log = u"Skipping nonexistent page: [[{0}]]".format(page.title)
self.logger.info(log)
else:
log = u"Tagging new page: [[{0}]]".format(page.title)
self.logger.info(log)
banner = "{{" + job.banner + job.append + "}}"
summary = job.summary.replace("$3", banner)
page.edit(banner, self.make_summary(summary))
self.process_new_page(page, job)
return return
except exceptions.InvalidPageError: except exceptions.InvalidPageError:
log = u"Skipping invalid page: [[{0}]]".format(page.title)
self.logger.error(log)
self.logger.error(u"Skipping invalid page: [[%s]]", page.title)
return return


is_update = False
for template in code.ifilter_templates(recursive=True): for template in code.ifilter_templates(recursive=True):
name = self._upperfirst(template.name.strip())
if name in job.names:
log = u"Skipping page: [[{0}]]; already tagged with '{1}'"
self.logger.info(log.format(page.title, name))
if template.name.matches(job.names):
if job.update:
banner = template
is_update = True
break
else:
log = u"Skipping page: [[%s]]; already tagged with '%s'"
self.logger.info(log, page.title, template.name)
return

if job.only_with:
if not any(template.name.matches(job.only_with)
for template in code.ifilter_templates(recursive=True)):
log = u"Skipping page: [[%s]]; fails only-with condition"
self.logger.info(log, page.title)
return return


banner = self.make_banner(job, code)
shell = self.get_banner_shell(code)
if shell:
if shell.has_param(1):
shell.get(1).value.insert(0, banner + "\n")
else:
shell.add(1, banner)
if is_update:
old_banner = unicode(banner)
self.update_banner(banner, job, code)
if banner == old_banner:
log = u"Skipping page: [[%s]]; already tagged and no updates"
self.logger.info(log, page.title)
return
self.logger.info(u"Updating banner on page: [[%s]]", page.title)
else: else:
self.add_banner(code, banner)
self.apply_genfixes(code)
self.logger.info(u"Tagging page: [[%s]]", page.title)
banner = self.make_banner(job, code)
shell = self.get_banner_shell(code)
if shell:
if shell.has_param(1):
shell.get(1).value.insert(0, banner + "\n")
else:
shell.add(1, banner)
else:
self.add_banner(code, banner)


self.logger.info(u"Tagging page: [[{0}]]".format(page.title))
summary = job.summary.replace("$3", banner)
page.edit(unicode(code), self.make_summary(summary))
if job.genfixes:
self.apply_genfixes(code)


def make_banner(self, job, code):
if job.dry_run:
self.logger.debug(u"DRY RUN: Banner: %s", banner)
else:
summary = job.summary.replace("$3", banner)
page.edit(unicode(code), self.make_summary(summary))

def process_new_page(self, page, job):
"""Try to tag a *page* that doesn't exist yet using the *job*."""
if job.nocreate or job.only_with:
log = u"Skipping nonexistent page: [[%s]]"
self.logger.info(log, page.title)
else:
self.logger.info(u"Tagging new page: [[%s]]", page.title)
banner = self.make_banner(job)
if job.dry_run:
self.logger.debug(u"DRY RUN: Banner: %s", banner)
else:
summary = job.summary.replace("$3", banner)
page.edit(banner, self.make_summary(summary))

def make_banner(self, job, code=None):
"""Return banner text to add based on a *job* and a page's *code*.""" """Return banner text to add based on a *job* and a page's *code*."""
banner = "{{" + job.banner
if job.autoassess:
classes = {"fa": 0, "fl": 0, "ga": 0, "a": 0, "b": 0, "start": 0,
"stub": 0, "list": 0, "dab": 0, "c": 0, "redirect": 0,
"book": 0, "template": 0, "category": 0}
for template in code.ifilter_templates(recursive=True):
if template.has_param("class"):
value = unicode(template.get("class").value).lower()
if value in classes:
classes[value] += 1
values = tuple(classes.values())
banner = job.banner
if code is not None and job.autoassess is not False:
assessment = self.get_autoassessment(code, job.autoassess)
if assessment:
banner += "|class=" + assessment
if job.append:
banner += "|" + "|".join(job.append.split(","))
return "{{" + banner + "}}"

def update_banner(self, banner, job, code):
"""Update an existing *banner* based on a *job* and a page's *code*."""
if job.autoassess is not False:
if not banner.has("class") or not banner.get("class").value:
assessment = self.get_autoassessment(code, job.autoassess)
if assessment:
banner.add("class", assessment)
if job.append:
for param in job.append.split(","):
key, value = param.split("=", 1)
if not banner.has(key) or not banner.get(key).value:
banner.add(key, value)

def get_autoassessment(self, code, only_classes=None):
if only_classes is None:
classnames = ["a", "b", "book", "c", "category", "dab", "fa",
"fl", "ga", "list", "redirect", "start", "stub",
"template"]
else:
classnames = [klass.strip().lower()
for klass in only_classes.split(",")]

classes = {klass: 0 for klass in classnames}
for template in code.ifilter_templates(recursive=True):
if template.has("class"):
value = unicode(template.get("class").value).lower()
if value in classes:
classes[value] += 1

values = tuple(classes.values())
if values:
best = max(values) best = max(values)
confidence = float(best) / sum(values) confidence = float(best) / sum(values)
if confidence > 0.75: if confidence > 0.75:
rank = tuple(classes.keys())[values.index(best)] rank = tuple(classes.keys())[values.index(best)]
if rank in ("fa", "fl", "ga"): if rank in ("fa", "fl", "ga"):
banner += "|class=" + rank.upper()
return rank.upper()
else: else:
banner += "|class=" + self._upperfirst(rank)
return banner + job.append + "}}"
return self._upperfirst(rank)
return None


def get_banner_shell(self, code): def get_banner_shell(self, code):
"""Return the banner shell template within *code*, else ``None``.""" """Return the banner shell template within *code*, else ``None``."""
@@ -281,8 +364,8 @@ class WikiProjectTagger(Task):
if not shells: if not shells:
shells = code.filter_templates(matches=regex, recursive=True) shells = code.filter_templates(matches=regex, recursive=True)
if shells: if shells:
log = u"Inserting banner into shell: {0}"
self.logger.debug(log.format(shells[0].name))
log = u"Inserting banner into shell: %s"
self.logger.debug(log, shells[0].name)
return shells[0] return shells[0]


def add_banner(self, code, banner): def add_banner(self, code, banner):
@@ -292,15 +375,16 @@ class WikiProjectTagger(Task):
name = template.name.lower().replace("_", " ") name = template.name.lower().replace("_", " ")
for regex in self.TOP_TEMPS: for regex in self.TOP_TEMPS:
if re.match(regex, name): if re.match(regex, name):
self.logger.info("Skipping top template: {0}".format(name))
self.logger.debug(u"Skipping top template: %s", name)
index = i + 1 index = i + 1


self.logger.debug(u"Inserting banner at index {0}".format(index))
self.logger.debug(u"Inserting banner at index %s", index)
code.insert(index, banner) code.insert(index, banner)


def apply_genfixes(self, code): def apply_genfixes(self, code):
"""Apply general fixes to *code*, such as template substitution.""" """Apply general fixes to *code*, such as template substitution."""
regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)"
regex = (r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|"
r"forgot to sign|without signature)")
for template in code.ifilter_templates(matches=regex): for template in code.ifilter_templates(matches=regex):
self.logger.debug("Applying genfix: substitute {{unsigned}}") self.logger.debug("Applying genfix: substitute {{unsigned}}")
template.name = "subst:unsigned" template.name = "subst:unsigned"
@@ -313,13 +397,17 @@ class _Job(object):
or not to autoassess and create new pages from scratch, and a counter of or not to autoassess and create new pages from scratch, and a counter of
the number of pages edited. the number of pages edited.
""" """
def __init__(self, banner, names, summary, append, autoassess, nocreate):
self.banner = banner
self.names = names
self.summary = summary
self.append = append
self.autoassess = autoassess
self.nocreate = nocreate
def __init__(self, **kwargs):
self.banner = kwargs["banner"]
self.names = kwargs["names"]
self.summary = kwargs["summary"]
self.update = kwargs["update"]
self.append = kwargs["append"]
self.autoassess = kwargs["autoassess"]
self.only_with = kwargs["only_with"]
self.nocreate = kwargs["nocreate"]
self.genfixes = kwargs["genfixes"]
self.dry_run = kwargs["dry_run"]
self.counter = 0 self.counter = 0






Loading…
Cancel
Save