Browse Source

Also strip out files and categories in ATP.strip().

tags/v0.2
Ben Kurtovic 10 years ago
parent
commit
6b146a397a
1 changed files with 7 additions and 0 deletions
  1. +7
    -0
      earwigbot/wiki/copyvios/parsers.py

+ 7
- 0
earwigbot/wiki/copyvios/parsers.py View File

@@ -63,6 +63,13 @@ class ArticleTextParser(BaseTextParser):
The actual stripping is handled by :py:mod:`mwparserfromhell`. The actual stripping is handled by :py:mod:`mwparserfromhell`.
""" """
wikicode = mwparserfromhell.parse(self.text) wikicode = mwparserfromhell.parse(self.text)

# Preemtively strip some links mwparser doesn't know about:
bad_prefixes = ("file:", "image:", "category:")
for link in wikicode.filter_wikilinks():
if link.title.strip().lower().startswith(bad_prefixes):
wikicode.remove(link)

clean = wikicode.strip_code(normalize=True, collapse=True) clean = wikicode.strip_code(normalize=True, collapse=True)
self.clean = clean.replace("\n\n", "\n").strip() self.clean = clean.replace("\n\n", "\n").strip()
return self.clean return self.clean


Loading…
Cancel
Save