소스 검색

More progress on tagging stuff.

tags/v0.1^2
Ben Kurtovic 11 년 전
부모
커밋
57706a8220
1개의 변경된 파일90개의 추가작업 그리고 25개의 파일을 삭제
  1. +90
    -25
      earwigbot/tasks/wikiproject_tagger.py

+ 90
- 25
earwigbot/tasks/wikiproject_tagger.py 파일 보기

@@ -69,31 +69,38 @@ class WikiProjectTagger(Task):
# Regexes for template names that should always go above the banner, based
# on [[Wikipedia:Talk page layout]]:
TOP_TEMPS = [
"skip[ _]?to ?(toc|talk|toctalk)",
r"skip ?to ?(toc|talk|toctalk)",

"ga ?nominee",
r"ga ?nominee",

"(user ?)?talk ?(header|page|page ?header)",
r"(user ?)?talk ?(header|page|page ?header)",

"community ?article ?probation",
"censor(-nudity)?",
"blp(o| ?others?)?",
"controvers(ial2?|y)"
r"community ?article ?probation",
r"censor(-nudity)?",
r"blp(o| ?others?)?",
r"controvers(ial2?|y)"

"(not ?(a ?)?)?forum",
"tv(episode|series)talk",
"recurring ?themes",
"faq",
"(round ?in ?)?circ(les|ular)",
r"(not ?(a ?)?)?forum",
r"tv(episode|series)talk",
r"recurring ?themes",
r"faq",
r"(round ?in ?)?circ(les|ular)",

"ar(ti|it)cle ?(history|milestones)",
"failed ?ga",
"old ?prod( ?full)?",
"(old|previous) ?afd",
r"ar(ti|it)cle ?(history|milestones)",
r"failed ?ga",
r"old ?prod( ?full)?",
r"(old|previous) ?afd",

"((wikiproject|wp) ?)?bio(graph(y|ies))?"
r"((wikiproject|wp) ?)?bio(graph(y|ies))?"
]

def _upperfirst(self, text):
"""Try to uppercase the first letter of a string."""
try:
return text[0].upper() + text[1:]
except IndexError:
return text

def run(self, **kwargs):
"""Main entry point for the bot task."""
if "file" not in kwargs and "category" not in kwargs:
@@ -123,6 +130,7 @@ class WikiProjectTagger(Task):
return

def run_job(self, kwargs, site, job, recursive):
"""Run a tagging *job* on a given *site*."""
if "category" in kwargs:
title = kwargs["category"]
title = self.guess_namespace(site, title, constants.NS_CATEGORY)
@@ -142,6 +150,12 @@ class WikiProjectTagger(Task):
self.process_page(page, job)

def guess_namespace(self, site, title, assumed):
"""If the given *title* does not have an explicit namespace, guess it.

For example, when transcluding templates, the namespace is guessed to
be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` ->
``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``).
"""
prefix = title.split(":", 1)[0]
if prefix == title:
return u":".join((site.namespace_id_to_name(assumed), title))
@@ -152,6 +166,7 @@ class WikiProjectTagger(Task):
return title

def get_names(self, site, banner):
"""Return all possible aliases for a given *banner* template."""
title = self.guess_namespace(site, banner, constants.NS_TEMPLATE)
if title == banner:
banner = banner.split(":", 1)[1]
@@ -160,7 +175,10 @@ class WikiProjectTagger(Task):
self.logger.error(u"Banner [[{0}]] does not exist".format(title))
return banner, None

names = [banner] if banner == title else [banner, title]
if banner == text:
names = [self._upperfirst(banner)]
else:
names = [self._upperfirst(banner), self._upperfirst(title)]
result = site.api_query(action="query", list="backlinks", bllimit=500,
blfilterredir="redirects", bltitle=title)
for backlink in result["query"]["backlinks"]:
@@ -173,6 +191,7 @@ class WikiProjectTagger(Task):
return banner, names

def process_category(self, page, job, recursive):
"""Try to tag all pages in the given category."""
self.logger.info(u"Processing category: [[{0]]".format(page.title))
for member in page.get_members():
if member.namespace == constants.NS_CATEGORY:
@@ -184,6 +203,7 @@ class WikiProjectTagger(Task):
self.process_page(member, job)

def process_page(self, page, job):
"""Try to tag a specific *page* using the *job* description."""
if job.counter % 10 == 0: # Do a shutoff check every ten pages
if self.shutoff_enabled(page.site):
raise _ShutoffEnabled()
@@ -209,13 +229,58 @@ class WikiProjectTagger(Task):
self.logger.error(log)
return

raise NotImplementedError()

text = unicode(code)
if page.get() != text:
self.logger.info(u"Tagging page: [[{0}]]".format(page.title))
summary = job.summary.replace("$3", banner)
page.edit(text, self.make_summary(summary))
for template in code.ifilter_templates(recursive=True):
name = self.upperfirst(template.name.strip())
if name in job.names:
log = u"Skipping page: [[{0}]]; already tagged with '{1}'"
self.logger.info(log.format(page.title, name))
return

banner = self.make_banner(job, code)
shell = self.get_banner_shell(code)
if shell:
if shell.has_param(1):
shell.get(1).value.insert(0, banner + "\n")
else:
shell.add(1, banner)
else:
self.add_banner(code, banner)
self.apply_genfixes(code)

self.logger.info(u"Tagging page: [[{0}]]".format(page.title))
summary = job.summary.replace("$3", banner)
page.edit(unicode(code), self.make_summary(summary))

def make_banner(self, job, code):
"""Return banner text to add based on a *job* and a page's *code*."""
banner = "{{" + job.banner
if job.autoassess:
assessment = self.assess(code) # TODO
if assessment:
banner += "|class=" + assessment
return banner + job.append + "}}"

def get_banner_shell(self, code):
"""Return the banner shell template within *code*, else ``None``."""
regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)"
shells = code.filter_templates(matches=regex)
if not shells:
shells = code.filter_templates(matches=regex, recursive=True)
if shells:
return shells[0]

def add_banner(self, code, banner):
"""Add *banner* to *code*, following template order conventions."""
if has_top_temps: # TODO
xxx
else:
yyy

def apply_genfixes(self, code):
"""Apply general fixes to *code*, such as template substitution."""
regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)"
for template in code.ifilter_templates(matches=regex):
template.name = "subst:unsigned"


class _Job(object):


불러오는 중...
취소
저장