From e81284af91ed48f3d544bf2f618d0629f30ca997 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 26 Aug 2012 02:36:04 -0400 Subject: [PATCH 1/9] A list of regexes for templates that always go above the banner. --- earwigbot/tasks/wikiproject_tagger.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index e0ae917..658949b 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -20,14 +20,40 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import re + from earwigbot.tasks import Task class WikiProjectTagger(Task): - """A task to tag talk pages with WikiProject Banners.""" + """A task to tag talk pages with WikiProject banners. + + Usage: + earwigbot -t wikiproject_tagger PATH [--category CAT] + """ name = "wikiproject_tagger" + # Regexes for template names that should always go above the banner: + TOP_TEMPS = [ + "skip[ _]?to ?(toc|talk|toctalk)", + "community ?article ?probation", + "censor(-nudity)?", + "controvers(ial2?|y)" + "blp(o| ?others?)?", + "(user ?)?talk ?(header|page|page ?header)", + "(not ?(a ?)?)?forum", + "tv(episode|series)talk", + "recurring ?themes", + "faq", + "(round ?in ?)?circ(les|ular)", + "ar(ti|it)cle ?(history|milestones)", + "failed ?ga", + "old ?prod( ?full)?", + "(old|previous) ?afd", + "((wikiproject|wp) ?)?bio(graph(y|ies))?" + ] + def setup(self): pass def run(self, **kwargs): - pass + print kwargs From b9bfeb4f2f837574b634f17e7f70d6e41f11d1e4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 26 Aug 2012 16:45:03 -0400 Subject: [PATCH 2/9] Usage notes for wikiproject_tagger --- docs/api/earwigbot.tasks.rst | 1 - earwigbot/tasks/wikiproject_tagger.py | 56 +++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/docs/api/earwigbot.tasks.rst b/docs/api/earwigbot.tasks.rst index 5ab84dc..1e0a50d 100644 --- a/docs/api/earwigbot.tasks.rst +++ b/docs/api/earwigbot.tasks.rst @@ -13,5 +13,4 @@ tasks Package .. automodule:: earwigbot.tasks.wikiproject_tagger :members: - :undoc-members: :show-inheritance: diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index 658949b..88867f1 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -27,33 +27,71 @@ from earwigbot.tasks import Task class WikiProjectTagger(Task): """A task to tag talk pages with WikiProject banners. - Usage: - earwigbot -t wikiproject_tagger PATH [--category CAT] + Usage: :command:`earwigbot -t wikiproject_tagger PATH + --banner BANNER (--category CAT | --file FILE) [--summary SUM] + [--append TEXT] [--autoassess] [--nocreate] [--recursive NUM]` + + .. glossary:: + + ``--banner BANNER`` + the page name of the banner to add, without a namespace (unless the + namespace is something other than ``Template``) so + ``--banner WikiProject Biography`` for ``{{WikiProject Biography}}`` + ``--category CAT`` or ``--file FILE`` + determines which pages to tag; either all pages in a category (to + include subcategories as well, see ``--recursive``) or all + pages/categories in a file (utf-8 encoded and path relative to the + current directory) + ``--summary SUM`` + an optional edit summary to use; defaults to + ``"Adding {{BANNER}} to article talk page."`` + ``--append TEXT`` + optional text to append to the banner (after an autoassessment, if + any), like ``|importance=low`` + ``--autoassess`` + try to assess each article's class automatically based on the class of + other banners on the same page + ``--nocreate`` + don't create new talk pages with just a banner if the page doesn't + already exist + ``--recursive NUM`` + recursively go through subcategories up to a maximum depth of ``NUM``, + or if ``NUM`` isn't provided, go infinitely (this can be dangerous) + """ name = "wikiproject_tagger" - # Regexes for template names that should always go above the banner: + # Regexes for template names that should always go above the banner, based + # on [[Wikipedia:Talk page layout]]: TOP_TEMPS = [ "skip[ _]?to ?(toc|talk|toctalk)", + + "ga ?nominee", + + "(user ?)?talk ?(header|page|page ?header)", + "community ?article ?probation", "censor(-nudity)?", - "controvers(ial2?|y)" "blp(o| ?others?)?", - "(user ?)?talk ?(header|page|page ?header)", + "controvers(ial2?|y)" + "(not ?(a ?)?)?forum", "tv(episode|series)talk", "recurring ?themes", "faq", "(round ?in ?)?circ(les|ular)", + "ar(ti|it)cle ?(history|milestones)", "failed ?ga", "old ?prod( ?full)?", "(old|previous) ?afd", + "((wikiproject|wp) ?)?bio(graph(y|ies))?" ] - def setup(self): - pass - def run(self, **kwargs): - print kwargs + """Main entry point for the bot task.""" + if "category" in kwargs: + pass + elif "file" in kwargs: + pass From ad07bbafe9b5028e88a9a68ab3b342e719968de7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 27 Aug 2012 00:18:24 -0400 Subject: [PATCH 3/9] Implement run(), process_category() methods. --- earwigbot/tasks/wikiproject_tagger.py | 64 ++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index 88867f1..eb16e05 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -22,14 +22,17 @@ import re +from earwigbot import exceptions from earwigbot.tasks import Task +from earwigbot.wiki import constants class WikiProjectTagger(Task): """A task to tag talk pages with WikiProject banners. Usage: :command:`earwigbot -t wikiproject_tagger PATH --banner BANNER (--category CAT | --file FILE) [--summary SUM] - [--append TEXT] [--autoassess] [--nocreate] [--recursive NUM]` + [--append TEXT] [--autoassess] [--nocreate] [--recursive NUM] + [--site SITE]` .. glossary:: @@ -57,6 +60,8 @@ class WikiProjectTagger(Task): ``--recursive NUM`` recursively go through subcategories up to a maximum depth of ``NUM``, or if ``NUM`` isn't provided, go infinitely (this can be dangerous) + ``--site SITE`` + the ID of the site to tag pages on, defaulting to the... default site """ name = "wikiproject_tagger" @@ -91,7 +96,58 @@ class WikiProjectTagger(Task): def run(self, **kwargs): """Main entry point for the bot task.""" + if "file" not in kwargs and "category" not in kwargs: + log = "No pages to tag; I need either a 'category' or a 'file' passed as kwargs" + self.logger.error(log) + return + if "banner" not in kwargs: + log = "Needs a banner to add passed as the 'banner' kwarg" + self.logger.error(log) + return + + banner = kwargs["banner"] + summary = kwargs.get("summary", "Adding $3 to article talk page.") + append = kwargs.get("append") + autoassess = kwargs.get("autoassess", False) + nocreate = kwargs.get("nocreate", False) + recursive = kwargs.get("recursive", 0) + site = self.bot.wiki.get_site(name=kwargs.get("site")) + if "category" in kwargs: - pass - elif "file" in kwargs: - pass + title = kwargs["category"] + prefix = name.split(":", 1)[0] + ns_cat = site.namespace_id_to_name(constants.NS_CATEGORY) + if prefix == title: + title = u":".join((ns_cat, title)) + else: + try: + site.namespace_name_to_id(prefix) + except exceptions.NamespaceNotFoundError: + title = u":".join((ns_cat, title)) + self.process_category(title, recursive) + + if "file" in kwargs: + with open(kwargs["file"], "r") as fileobj: + for line in fileobj: + if line.strip(): + line = line.decode("utf8") + if line.startswith("[[") and line.endswith("]]"): + line = line[2:-2] + page = site.get_page(line) + if page.namespace == constants.NS_CATEGORY: + self.process_category(page, recursive) + else: + self.process_page(page) + + def process_category(self, page, recursive): + for member in page.get_members(): + if member.namespace == constants.NS_CATEGORY: + if recursive is True: + self.process_category(member, True) + elif recursive: + self.process_category(member, recursive - 1) + else: + self.process_page(member) + + def process_page(self, page): + raise NotImplementedError(page) From 1799f2f568f371abb1281702013c4a905f0f24be Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 27 Aug 2012 01:29:50 -0400 Subject: [PATCH 4/9] Implement guess_namespace(), get_names(), part of process_page(). --- earwigbot/tasks/wikiproject_tagger.py | 105 +++++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 21 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index eb16e05..92473ed 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -47,7 +47,7 @@ class WikiProjectTagger(Task): current directory) ``--summary SUM`` an optional edit summary to use; defaults to - ``"Adding {{BANNER}} to article talk page."`` + ``"Adding WikiProject banner {{BANNER}}."`` ``--append TEXT`` optional text to append to the banner (after an autoassessment, if any), like ``|importance=low`` @@ -106,25 +106,21 @@ class WikiProjectTagger(Task): return banner = kwargs["banner"] - summary = kwargs.get("summary", "Adding $3 to article talk page.") + summary = kwargs.get("summary", "Adding WikiProject banner $3.") append = kwargs.get("append") autoassess = kwargs.get("autoassess", False) nocreate = kwargs.get("nocreate", False) recursive = kwargs.get("recursive", 0) + banner, names = self.get_names(site, banner) + if not names: + return + job = _Job(banner, names, summary, append, autoassess, nocreate) site = self.bot.wiki.get_site(name=kwargs.get("site")) if "category" in kwargs: title = kwargs["category"] - prefix = name.split(":", 1)[0] - ns_cat = site.namespace_id_to_name(constants.NS_CATEGORY) - if prefix == title: - title = u":".join((ns_cat, title)) - else: - try: - site.namespace_name_to_id(prefix) - except exceptions.NamespaceNotFoundError: - title = u":".join((ns_cat, title)) - self.process_category(title, recursive) + title = self.guess_namespace(title, constants.NS_CATEGORY) + self.process_category(site.get_page(title), job, recursive) if "file" in kwargs: with open(kwargs["file"], "r") as fileobj: @@ -135,19 +131,86 @@ class WikiProjectTagger(Task): line = line[2:-2] page = site.get_page(line) if page.namespace == constants.NS_CATEGORY: - self.process_category(page, recursive) + self.process_category(page, job, recursive) else: - self.process_page(page) - - def process_category(self, page, recursive): + self.process_page(page, job) + + def guess_namespace(self, title, assumed): + prefix = title.split(":", 1)[0] + if prefix == title: + return u":".join((site.namespace_id_to_name(assumed), title)) + try: + site.namespace_name_to_id(prefix) + except exceptions.NamespaceNotFoundError: + return u":".join((site.namespace_id_to_name(assumed), title)) + return title + + def get_names(self, site, banner): + title = self.guess_namespace(banner, constants.NS_TEMPLATE) + if title == banner: + banner = banner.split(":", 1)[1] + page = site.get_page(title) + if page.exists != page.PAGE_EXISTS: + self.logger.error("Banner [[{0}]] does not exist".format(title)) + return banner, None + + names = [banner] if banner == title else [banner, title] + result = site.api_query(action="query", list="backlinks", bllimit=500, + blfilterredir="redirects", bltitle=title) + for backlink in result["query"]["backlinks"]: + names.append(backlink["title"]) + if backlink["ns"] == constants.NS_TEMPLATE: + names.append(backlink["title"].split(":", 1)[1]) + + log = "Found {0} aliases for banner [[{1}]]".format(len(names), title) + self.logger.debug(log) + return banner, names + + def process_category(self, page, job, recursive): + self.logger.info("Processing category: [[{0]]".format(page.title)) for member in page.get_members(): if member.namespace == constants.NS_CATEGORY: if recursive is True: - self.process_category(member, True) + self.process_category(member, job, True) elif recursive: - self.process_category(member, recursive - 1) + self.process_category(member, job, recursive - 1) else: - self.process_page(member) + self.process_page(member, job) + + def process_page(self, page, job): + if not page.is_talkpage: + page = page.toggle_talk() + try: + code = page.parse() + except exceptions.PageNotFoundError: + if job.nocreate: + log = "Skipping nonexistent page: [[{0}]]".format(page.title) + self.logger.info(log) + else: + log = "Tagging new page: [[{0}]]".format(page.title) + self.logger.info(log) + banner = "{{" + job.banner + job.append + "}}" + summary = job.summary.replace("$3", banner) + page.edit(banner, self.make_summary(summary)) + return + except exceptions.InvalidPageError: + log = u"Skipping invalid page: [[{0}]]".format(page.title) + self.logger.error(log) + return + + raise NotImplementedError() + + text = unicode(code) + if page.get() != text: + summary = job.summary.replace("$3", banner) + page.edit(text, self.make_summary(summary)) + - def process_page(self, page): - raise NotImplementedError(page) +class _Job(object): + def __init__(self, banner, names, summary, append, autoassess, nocreate): + self.banner = banner + self.names = names + self.summary = summary + self.append = append + self.autoassess = autoassess + self.nocreate = nocreate From 588d3fb6966f6e57c80b0773d85d229f2cce371c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 29 Aug 2012 18:12:13 -0400 Subject: [PATCH 5/9] Cleanup; support shutoff. --- earwigbot/tasks/wikiproject_tagger.py | 43 +++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index 92473ed..ed885e8 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -105,6 +105,7 @@ class WikiProjectTagger(Task): self.logger.error(log) return + site = self.bot.wiki.get_site(name=kwargs.get("site")) banner = kwargs["banner"] summary = kwargs.get("summary", "Adding WikiProject banner $3.") append = kwargs.get("append") @@ -115,11 +116,16 @@ class WikiProjectTagger(Task): if not names: return job = _Job(banner, names, summary, append, autoassess, nocreate) - site = self.bot.wiki.get_site(name=kwargs.get("site")) + try: + self.run_job(kwargs, site, job, recursive) + except _ShutoffEnabled: + return + + def run_job(self, kwargs, site, job, recursive): if "category" in kwargs: title = kwargs["category"] - title = self.guess_namespace(title, constants.NS_CATEGORY) + title = self.guess_namespace(site, title, constants.NS_CATEGORY) self.process_category(site.get_page(title), job, recursive) if "file" in kwargs: @@ -135,7 +141,7 @@ class WikiProjectTagger(Task): else: self.process_page(page, job) - def guess_namespace(self, title, assumed): + def guess_namespace(self, site, title, assumed): prefix = title.split(":", 1)[0] if prefix == title: return u":".join((site.namespace_id_to_name(assumed), title)) @@ -146,12 +152,12 @@ class WikiProjectTagger(Task): return title def get_names(self, site, banner): - title = self.guess_namespace(banner, constants.NS_TEMPLATE) + title = self.guess_namespace(site, banner, constants.NS_TEMPLATE) if title == banner: banner = banner.split(":", 1)[1] page = site.get_page(title) if page.exists != page.PAGE_EXISTS: - self.logger.error("Banner [[{0}]] does not exist".format(title)) + self.logger.error(u"Banner [[{0}]] does not exist".format(title)) return banner, None names = [banner] if banner == title else [banner, title] @@ -162,12 +168,12 @@ class WikiProjectTagger(Task): if backlink["ns"] == constants.NS_TEMPLATE: names.append(backlink["title"].split(":", 1)[1]) - log = "Found {0} aliases for banner [[{1}]]".format(len(names), title) + log = u"Found {0} aliases for banner [[{1}]]".format(len(names), title) self.logger.debug(log) return banner, names def process_category(self, page, job, recursive): - self.logger.info("Processing category: [[{0]]".format(page.title)) + self.logger.info(u"Processing category: [[{0]]".format(page.title)) for member in page.get_members(): if member.namespace == constants.NS_CATEGORY: if recursive is True: @@ -178,16 +184,21 @@ class WikiProjectTagger(Task): self.process_page(member, job) def process_page(self, page, job): + if job.counter % 10 == 0: # Do a shutoff check every ten pages + if self.shutoff_enabled(page.site): + raise _ShutoffEnabled() + job.counter += 1 + if not page.is_talkpage: page = page.toggle_talk() try: code = page.parse() except exceptions.PageNotFoundError: if job.nocreate: - log = "Skipping nonexistent page: [[{0}]]".format(page.title) + log = u"Skipping nonexistent page: [[{0}]]".format(page.title) self.logger.info(log) else: - log = "Tagging new page: [[{0}]]".format(page.title) + log = u"Tagging new page: [[{0}]]".format(page.title) self.logger.info(log) banner = "{{" + job.banner + job.append + "}}" summary = job.summary.replace("$3", banner) @@ -202,11 +213,18 @@ class WikiProjectTagger(Task): text = unicode(code) if page.get() != text: + self.logger.info(u"Tagging page: [[{0}]]".format(page.title)) summary = job.summary.replace("$3", banner) page.edit(text, self.make_summary(summary)) class _Job(object): + """Represents a single wikiproject-tagging task. + + Stores information on the banner to add, the edit summary to use, whether + or not to autoassess and create new pages from scratch, and a counter of + the number of pages edited. + """ def __init__(self, banner, names, summary, append, autoassess, nocreate): self.banner = banner self.names = names @@ -214,3 +232,10 @@ class _Job(object): self.append = append self.autoassess = autoassess self.nocreate = nocreate + self.counter = 0 + + +class _ShutoffEnabled(Exception): + """Raised by process_page() if shutoff is enabled. Caught by run(), which + will then stop the task.""" + pass From 57706a82204da3d8d6a789e50ee9917279e66221 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 30 Aug 2012 17:16:53 -0400 Subject: [PATCH 6/9] More progress on tagging stuff. --- earwigbot/tasks/wikiproject_tagger.py | 115 ++++++++++++++++++++++++++-------- 1 file changed, 90 insertions(+), 25 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index ed885e8..5fbb15f 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -69,31 +69,38 @@ class WikiProjectTagger(Task): # Regexes for template names that should always go above the banner, based # on [[Wikipedia:Talk page layout]]: TOP_TEMPS = [ - "skip[ _]?to ?(toc|talk|toctalk)", + r"skip ?to ?(toc|talk|toctalk)", - "ga ?nominee", + r"ga ?nominee", - "(user ?)?talk ?(header|page|page ?header)", + r"(user ?)?talk ?(header|page|page ?header)", - "community ?article ?probation", - "censor(-nudity)?", - "blp(o| ?others?)?", - "controvers(ial2?|y)" + r"community ?article ?probation", + r"censor(-nudity)?", + r"blp(o| ?others?)?", + r"controvers(ial2?|y)" - "(not ?(a ?)?)?forum", - "tv(episode|series)talk", - "recurring ?themes", - "faq", - "(round ?in ?)?circ(les|ular)", + r"(not ?(a ?)?)?forum", + r"tv(episode|series)talk", + r"recurring ?themes", + r"faq", + r"(round ?in ?)?circ(les|ular)", - "ar(ti|it)cle ?(history|milestones)", - "failed ?ga", - "old ?prod( ?full)?", - "(old|previous) ?afd", + r"ar(ti|it)cle ?(history|milestones)", + r"failed ?ga", + r"old ?prod( ?full)?", + r"(old|previous) ?afd", - "((wikiproject|wp) ?)?bio(graph(y|ies))?" + r"((wikiproject|wp) ?)?bio(graph(y|ies))?" ] + def _upperfirst(self, text): + """Try to uppercase the first letter of a string.""" + try: + return text[0].upper() + text[1:] + except IndexError: + return text + def run(self, **kwargs): """Main entry point for the bot task.""" if "file" not in kwargs and "category" not in kwargs: @@ -123,6 +130,7 @@ class WikiProjectTagger(Task): return def run_job(self, kwargs, site, job, recursive): + """Run a tagging *job* on a given *site*.""" if "category" in kwargs: title = kwargs["category"] title = self.guess_namespace(site, title, constants.NS_CATEGORY) @@ -142,6 +150,12 @@ class WikiProjectTagger(Task): self.process_page(page, job) def guess_namespace(self, site, title, assumed): + """If the given *title* does not have an explicit namespace, guess it. + + For example, when transcluding templates, the namespace is guessed to + be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` -> + ``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``). + """ prefix = title.split(":", 1)[0] if prefix == title: return u":".join((site.namespace_id_to_name(assumed), title)) @@ -152,6 +166,7 @@ class WikiProjectTagger(Task): return title def get_names(self, site, banner): + """Return all possible aliases for a given *banner* template.""" title = self.guess_namespace(site, banner, constants.NS_TEMPLATE) if title == banner: banner = banner.split(":", 1)[1] @@ -160,7 +175,10 @@ class WikiProjectTagger(Task): self.logger.error(u"Banner [[{0}]] does not exist".format(title)) return banner, None - names = [banner] if banner == title else [banner, title] + if banner == text: + names = [self._upperfirst(banner)] + else: + names = [self._upperfirst(banner), self._upperfirst(title)] result = site.api_query(action="query", list="backlinks", bllimit=500, blfilterredir="redirects", bltitle=title) for backlink in result["query"]["backlinks"]: @@ -173,6 +191,7 @@ class WikiProjectTagger(Task): return banner, names def process_category(self, page, job, recursive): + """Try to tag all pages in the given category.""" self.logger.info(u"Processing category: [[{0]]".format(page.title)) for member in page.get_members(): if member.namespace == constants.NS_CATEGORY: @@ -184,6 +203,7 @@ class WikiProjectTagger(Task): self.process_page(member, job) def process_page(self, page, job): + """Try to tag a specific *page* using the *job* description.""" if job.counter % 10 == 0: # Do a shutoff check every ten pages if self.shutoff_enabled(page.site): raise _ShutoffEnabled() @@ -209,13 +229,58 @@ class WikiProjectTagger(Task): self.logger.error(log) return - raise NotImplementedError() - - text = unicode(code) - if page.get() != text: - self.logger.info(u"Tagging page: [[{0}]]".format(page.title)) - summary = job.summary.replace("$3", banner) - page.edit(text, self.make_summary(summary)) + for template in code.ifilter_templates(recursive=True): + name = self.upperfirst(template.name.strip()) + if name in job.names: + log = u"Skipping page: [[{0}]]; already tagged with '{1}'" + self.logger.info(log.format(page.title, name)) + return + + banner = self.make_banner(job, code) + shell = self.get_banner_shell(code) + if shell: + if shell.has_param(1): + shell.get(1).value.insert(0, banner + "\n") + else: + shell.add(1, banner) + else: + self.add_banner(code, banner) + self.apply_genfixes(code) + + self.logger.info(u"Tagging page: [[{0}]]".format(page.title)) + summary = job.summary.replace("$3", banner) + page.edit(unicode(code), self.make_summary(summary)) + + def make_banner(self, job, code): + """Return banner text to add based on a *job* and a page's *code*.""" + banner = "{{" + job.banner + if job.autoassess: + assessment = self.assess(code) # TODO + if assessment: + banner += "|class=" + assessment + return banner + job.append + "}}" + + def get_banner_shell(self, code): + """Return the banner shell template within *code*, else ``None``.""" + regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)" + shells = code.filter_templates(matches=regex) + if not shells: + shells = code.filter_templates(matches=regex, recursive=True) + if shells: + return shells[0] + + def add_banner(self, code, banner): + """Add *banner* to *code*, following template order conventions.""" + if has_top_temps: # TODO + xxx + else: + yyy + + def apply_genfixes(self, code): + """Apply general fixes to *code*, such as template substitution.""" + regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)" + for template in code.ifilter_templates(matches=regex): + template.name = "subst:unsigned" class _Job(object): From 2c7d39169de0db6ca2f22318c97e783a1bac5b71 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 30 Aug 2012 22:50:43 -0400 Subject: [PATCH 7/9] Implement auto-assessment. --- earwigbot/tasks/wikiproject_tagger.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index 5fbb15f..099306b 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -255,9 +255,23 @@ class WikiProjectTagger(Task): """Return banner text to add based on a *job* and a page's *code*.""" banner = "{{" + job.banner if job.autoassess: - assessment = self.assess(code) # TODO - if assessment: - banner += "|class=" + assessment + classes = {"fa": 0, "fl": 0, "ga": 0, "a": 0, "b": 0, "start": 0, + "stub": 0, "list": 0, "dab": 0, "c": 0, "redirect": 0, + "book": 0, "template": 0, "category": 0} + for template in code.ifilter_templates(recursive=True): + if template.has_param("class"): + value = str(template.get("class").value).lower() + if value in classes: + classes[value] += 1 + values = tuple(classes.values()) + best = max(values) + confidence = float(best) / sum(values) + if confidence > 0.75: + rank = tuple(classes.keys())[values.index(best)] + if rank in ("fa", "fl", "ga"): + banner += "|class=" + rank.upper() + else: + banner += "|class=" + self._upperfirst(rank) return banner + job.append + "}}" def get_banner_shell(self, code): @@ -271,6 +285,7 @@ class WikiProjectTagger(Task): def add_banner(self, code, banner): """Add *banner* to *code*, following template order conventions.""" + ins_index = 0 if has_top_temps: # TODO xxx else: From 2afadce77152ddd2bc6f35afabfdba4aa83c248d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 30 Aug 2012 23:07:11 -0400 Subject: [PATCH 8/9] Finish up task. --- earwigbot/tasks/wikiproject_tagger.py | 54 ++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index 099306b..f2073fe 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -69,29 +69,29 @@ class WikiProjectTagger(Task): # Regexes for template names that should always go above the banner, based # on [[Wikipedia:Talk page layout]]: TOP_TEMPS = [ - r"skip ?to ?(toc|talk|toctalk)", + r"skip ?to ?(toc|talk|toctalk)$", - r"ga ?nominee", + r"ga ?nominee$", - r"(user ?)?talk ?(header|page|page ?header)", + r"(user ?)?talk ?(header|page|page ?header)$", - r"community ?article ?probation", - r"censor(-nudity)?", - r"blp(o| ?others?)?", - r"controvers(ial2?|y)" + r"community ?article ?probation$", + r"censor(-nudity)?$", + r"blp(o| ?others?)?$", + r"controvers(ial2?|y)$", - r"(not ?(a ?)?)?forum", - r"tv(episode|series)talk", - r"recurring ?themes", - r"faq", - r"(round ?in ?)?circ(les|ular)", + r"(not ?(a ?)?)?forum$", + r"tv(episode|series)talk$", + r"recurring ?themes$", + r"faq$", + r"(round ?in ?)?circ(les|ular)$", - r"ar(ti|it)cle ?(history|milestones)", - r"failed ?ga", - r"old ?prod( ?full)?", - r"(old|previous) ?afd", + r"ar(ti|it)cle ?(history|milestones)$", + r"failed ?ga$", + r"old ?prod( ?full)?$", + r"(old|previous) ?afd$", - r"((wikiproject|wp) ?)?bio(graph(y|ies))?" + r"((wikiproject|wp) ?)?bio(graph(y|ies))?$", ] def _upperfirst(self, text): @@ -260,7 +260,7 @@ class WikiProjectTagger(Task): "book": 0, "template": 0, "category": 0} for template in code.ifilter_templates(recursive=True): if template.has_param("class"): - value = str(template.get("class").value).lower() + value = unicode(template.get("class").value).lower() if value in classes: classes[value] += 1 values = tuple(classes.values()) @@ -281,20 +281,28 @@ class WikiProjectTagger(Task): if not shells: shells = code.filter_templates(matches=regex, recursive=True) if shells: + log = u"Inserting banner into shell: {0}" + self.logger.debug(log.format(shells[0].name)) return shells[0] def add_banner(self, code, banner): """Add *banner* to *code*, following template order conventions.""" - ins_index = 0 - if has_top_temps: # TODO - xxx - else: - yyy + index = 0 + for i, template in enumerate(code.ifilter_templates()): + name = template.name.lower().replace("_", " ") + for regex in self.TOP_TEMPS: + if re.match(regex, name): + self.logger.info("Skipping top template: {0}".format(name)) + index = i + 1 + + self.logger.debug(u"Inserting banner at index {0}".format(index)) + code.insert(index, banner) def apply_genfixes(self, code): """Apply general fixes to *code*, such as template substitution.""" regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)" for template in code.ifilter_templates(matches=regex): + self.logger.debug("Applying genfix: substitute {{unsigned}}") template.name = "subst:unsigned" From a3481ca6d3ae1cfb3ef89216e1da7ad338046573 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 30 Aug 2012 23:09:45 -0400 Subject: [PATCH 9/9] A couple of bugfixes. --- earwigbot/tasks/wikiproject_tagger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index f2073fe..1d7d30f 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -175,7 +175,7 @@ class WikiProjectTagger(Task): self.logger.error(u"Banner [[{0}]] does not exist".format(title)) return banner, None - if banner == text: + if banner == title: names = [self._upperfirst(banner)] else: names = [self._upperfirst(banner), self._upperfirst(title)] @@ -230,7 +230,7 @@ class WikiProjectTagger(Task): return for template in code.ifilter_templates(recursive=True): - name = self.upperfirst(template.name.strip()) + name = self._upperfirst(template.name.strip()) if name in job.names: log = u"Skipping page: [[{0}]]; already tagged with '{1}'" self.logger.info(log.format(page.title, name))