Browse Source

banner tagger: can tag categories, add |auto param if appropriate, fixes

tags/v0.3
Ben Kurtovic 7 years ago
parent
commit
f684821aef
1 changed files with 38 additions and 17 deletions
  1. +38
    -17
      earwigbot/tasks/wikiproject_tagger.py

+ 38
- 17
earwigbot/tasks/wikiproject_tagger.py View File

@@ -39,7 +39,7 @@ class WikiProjectTagger(Task):
``--banner BANNER`` ``--banner BANNER``
the page name of the banner to add, without a namespace (unless the the page name of the banner to add, without a namespace (unless the
namespace is something other than ``Template``) so namespace is something other than ``Template``) so
``--banner WikiProject Biography`` for ``{{WikiProject Biography}}``
``--banner "WikiProject Biography"`` for ``{{WikiProject Biography}}``
``--category CAT`` or ``--file FILE`` ``--category CAT`` or ``--file FILE``
determines which pages to tag; either all pages in a category (to determines which pages to tag; either all pages in a category (to
include subcategories as well, see ``--recursive``) or all include subcategories as well, see ``--recursive``) or all
@@ -67,6 +67,9 @@ class WikiProjectTagger(Task):
``--recursive NUM`` ``--recursive NUM``
recursively go through subcategories up to a maximum depth of ``NUM``, recursively go through subcategories up to a maximum depth of ``NUM``,
or if ``NUM`` isn't provided, go infinitely (this can be dangerous) or if ``NUM`` isn't provided, go infinitely (this can be dangerous)
``--tag-categories``
also tag category pages; will autoassess with ``|class=category`` if
``--autoassess`` is given
``--genfixes`` ``--genfixes``
apply general fixes to the page if already making other changes apply general fixes to the page if already making other changes
``--site SITE`` ``--site SITE``
@@ -134,6 +137,7 @@ class WikiProjectTagger(Task):
ow_banner = kwargs.get("only-with") ow_banner = kwargs.get("only-with")
nocreate = kwargs.get("nocreate", False) nocreate = kwargs.get("nocreate", False)
recursive = kwargs.get("recursive", 0) recursive = kwargs.get("recursive", 0)
tag_categories = kwargs.get("tag-categories", False)
genfixes = kwargs.get("genfixes", False) genfixes = kwargs.get("genfixes", False)
dry_run = kwargs.get("dry-run", False) dry_run = kwargs.get("dry-run", False)
banner, names = self.get_names(site, banner) banner, names = self.get_names(site, banner)
@@ -148,7 +152,8 @@ class WikiProjectTagger(Task):


job = _Job(banner=banner, names=names, summary=summary, update=update, job = _Job(banner=banner, names=names, summary=summary, update=update,
append=append, autoassess=autoassess, only_with=only_with, append=append, autoassess=autoassess, only_with=only_with,
nocreate=nocreate, genfixes=genfixes, dry_run=dry_run)
nocreate=nocreate, tag_categories=tag_categories,
genfixes=genfixes, dry_run=dry_run)


try: try:
self.run_job(kwargs, site, job, recursive) self.run_job(kwargs, site, job, recursive)
@@ -218,6 +223,8 @@ class WikiProjectTagger(Task):
self.logger.info(u"Processing category: [[%s]]", page.title) self.logger.info(u"Processing category: [[%s]]", page.title)
for member in page.get_members(): for member in page.get_members():
if member.namespace == constants.NS_CATEGORY: if member.namespace == constants.NS_CATEGORY:
if job.tag_categories:
self.process_page(member, job, is_category=True)
if recursive is True: if recursive is True:
self.process_category(member, job, True) self.process_category(member, job, True)
elif recursive > 0: elif recursive > 0:
@@ -225,7 +232,7 @@ class WikiProjectTagger(Task):
else: else:
self.process_page(member, job) self.process_page(member, job)


def process_page(self, page, job):
def process_page(self, page, job, is_category=False):
"""Try to tag a specific *page* using the *job* description.""" """Try to tag a specific *page* using the *job* description."""
if job.counter % 10 == 0: # Do a shutoff check every ten pages if job.counter % 10 == 0: # Do a shutoff check every ten pages
if self.shutoff_enabled(page.site): if self.shutoff_enabled(page.site):
@@ -264,7 +271,7 @@ class WikiProjectTagger(Task):


if is_update: if is_update:
old_banner = unicode(banner) old_banner = unicode(banner)
self.update_banner(banner, job, code)
self.update_banner(banner, job, code, is_category=is_category)
if banner == old_banner: if banner == old_banner:
log = u"Skipping page: [[%s]]; already tagged and no updates" log = u"Skipping page: [[%s]]; already tagged and no updates"
self.logger.info(log, page.title) self.logger.info(log, page.title)
@@ -273,7 +280,7 @@ class WikiProjectTagger(Task):
banner = banner.encode("utf8") banner = banner.encode("utf8")
else: else:
self.logger.info(u"Tagging page: [[%s]]", page.title) self.logger.info(u"Tagging page: [[%s]]", page.title)
banner = self.make_banner(job, code)
banner = self.make_banner(job, code, is_category=is_category)
shell = self.get_banner_shell(code) shell = self.get_banner_shell(code)
if shell: if shell:
if shell.has_param(1): if shell.has_param(1):
@@ -306,34 +313,44 @@ class WikiProjectTagger(Task):
summary = job.summary.replace("$3", banner) summary = job.summary.replace("$3", banner)
page.edit(text, self.make_summary(summary), minor=True) page.edit(text, self.make_summary(summary), minor=True)


def make_banner(self, job, code=None):
def make_banner(self, job, code=None, is_category=False):
"""Return banner text to add based on a *job* and a page's *code*.""" """Return banner text to add based on a *job* and a page's *code*."""
banner = job.banner banner = job.banner
if code is not None and job.autoassess is not False: if code is not None and job.autoassess is not False:
assessment = self.get_autoassessment(code, job.autoassess)
if assessment:
banner += "|class=" + assessment
assess, reason = self.get_autoassessment(
code, job.autoassess, is_category=is_category)
if assess:
banner += "|class=" + assess
if reason:
banner += "|auto=" + reason
if job.append: if job.append:
banner += "|" + "|".join(job.append.split(",")) banner += "|" + "|".join(job.append.split(","))
return "{{" + banner + "}}" return "{{" + banner + "}}"


def update_banner(self, banner, job, code):
def update_banner(self, banner, job, code, is_category=False):
"""Update an existing *banner* based on a *job* and a page's *code*.""" """Update an existing *banner* based on a *job* and a page's *code*."""
has = lambda key: (banner.has(key) and has = lambda key: (banner.has(key) and
banner.get(key).value.strip() not in ("", "?")) banner.get(key).value.strip() not in ("", "?"))


if job.autoassess is not False: if job.autoassess is not False:
if not has("class"): if not has("class"):
assessment = self.get_autoassessment(code, job.autoassess)
if assessment:
banner.add("class", assessment)
assess, reason = self.get_autoassessment(
code, job.autoassess, is_category=is_category)
if assess:
banner.add("class", assess)
if reason:
banner.add("auto", reason)
if job.append: if job.append:
for param in job.append.split(","): for param in job.append.split(","):
key, value = param.split("=", 1) key, value = param.split("=", 1)
if not has(key): if not has(key):
banner.add(key, value) banner.add(key, value)


def get_autoassessment(self, code, only_classes=None):
def get_autoassessment(self, code, only_classes=None, is_category=False):
"""Get an autoassessment for a page.

Return (assessed class as a string or None, assessment reason or None).
"""
if only_classes is None: if only_classes is None:
classnames = ["a", "b", "book", "c", "category", "dab", "fa", classnames = ["a", "b", "book", "c", "category", "dab", "fa",
"fl", "ga", "list", "redirect", "start", "stub", "fl", "ga", "list", "redirect", "start", "stub",
@@ -342,6 +359,9 @@ class WikiProjectTagger(Task):
classnames = [klass.strip().lower() classnames = [klass.strip().lower()
for klass in only_classes.split(",")] for klass in only_classes.split(",")]


if is_category:
return ("category" if "category" in classnames else None), None

classes = {klass: 0 for klass in classnames} classes = {klass: 0 for klass in classnames}
for template in code.ifilter_templates(recursive=True): for template in code.ifilter_templates(recursive=True):
if template.has("class"): if template.has("class"):
@@ -356,10 +376,10 @@ class WikiProjectTagger(Task):
if confidence > 0.75: if confidence > 0.75:
rank = tuple(classes.keys())[values.index(best)] rank = tuple(classes.keys())[values.index(best)]
if rank in ("fa", "fl", "ga"): if rank in ("fa", "fl", "ga"):
return rank.upper()
return rank.upper(), "inherit"
else: else:
return self._upperfirst(rank)
return None
return self._upperfirst(rank), "inherit"
return None, None


def get_banner_shell(self, code): def get_banner_shell(self, code):
"""Return the banner shell template within *code*, else ``None``.""" """Return the banner shell template within *code*, else ``None``."""
@@ -410,6 +430,7 @@ class _Job(object):
self.autoassess = kwargs["autoassess"] self.autoassess = kwargs["autoassess"]
self.only_with = kwargs["only_with"] self.only_with = kwargs["only_with"]
self.nocreate = kwargs["nocreate"] self.nocreate = kwargs["nocreate"]
self.tag_categories = kwargs["tag_categories"]
self.genfixes = kwargs["genfixes"] self.genfixes = kwargs["genfixes"]
self.dry_run = kwargs["dry_run"] self.dry_run = kwargs["dry_run"]
self.counter = 0 self.counter = 0


Loading…
Cancel
Save