Kaynağa Gözat

Try merging in templates with parameter values of a certain size (fixes #42)

tags/v0.2
Ben Kurtovic 9 yıl önce
ebeveyn
işleme
509598d7fc
1 değiştirilmiş dosya ile 18 ekleme ve 0 silme
  1. +18
    -0
      earwigbot/wiki/copyvios/parsers.py

+ 18
- 0
earwigbot/wiki/copyvios/parsers.py Dosyayı Görüntüle

@@ -58,6 +58,21 @@ class _BaseTextParser(object):
class ArticleTextParser(_BaseTextParser):
"""A parser that can strip and chunk wikicode article text."""
TYPE = "Article"
TEMPLATE_MERGE_THRESHOLD = 35

def _merge_templates(self, code):
"""Merge template contents in to wikicode when the values are long."""
for template in code.filter_templates(recursive=code.RECURSE_OTHERS):
chunks = []
for param in template.params:
if len(param.value) >= self.TEMPLATE_MERGE_THRESHOLD:
self._merge_templates(param.value)
chunks.append(param.value)
if chunks:
subst = u" ".join(map(unicode, chunks))
code.replace(template, u" " + subst + u" ")
else:
code.remove(template)

def strip(self):
"""Clean the page's raw text by removing templates and formatting.
@@ -94,6 +109,9 @@ class ArticleTextParser(_BaseTextParser):
for tag in wikicode.filter_tags(matches=lambda tag: tag.tag == "ref"):
remove(wikicode, tag)

# Merge in template contents when the values are long:
self._merge_templates(code)

clean = wikicode.strip_code(normalize=True, collapse=True)
self.clean = re.sub("\n\n+", "\n", clean).strip()
return self.clean


Yükleniyor…
İptal
Kaydet