Browse Source

Fix trying to remove a node that was already removed.

tags/v0.2
Ben Kurtovic 10 years ago
parent
commit
9b87e2e5f7
1 changed files with 14 additions and 2 deletions
  1. +14
    -2
      earwigbot/wiki/copyvios/parsers.py

+ 14
- 2
earwigbot/wiki/copyvios/parsers.py View File

@@ -62,17 +62,29 @@ class ArticleTextParser(BaseTextParser):

The actual stripping is handled by :py:mod:`mwparserfromhell`.
"""
def remove(code, node):
"""Remove a node from a code object, ignoring ValueError.

Sometimes we will remove a node that contains another node we wish
to remove, and we fail when we try to remove the inner one. Easiest
solution is to just ignore the exception.
"""
try:
code.remove(node)
except ValueError:
pass

wikicode = mwparserfromhell.parse(self.text)

# Preemtively strip some links mwparser doesn't know about:
bad_prefixes = ("file:", "image:", "category:")
for link in wikicode.filter_wikilinks():
if link.title.strip().lower().startswith(bad_prefixes):
wikicode.remove(link)
remove(wikicode, link)

# Also strip references:
for tag in wikicode.filter_tags(matches=lambda tag: tag.tag == "ref"):
wikicode.remove(tag)
remove(wikicode, tag)

clean = wikicode.strip_code(normalize=True, collapse=True)
self.clean = clean.replace("\n\n", "\n").strip()


Loading…
Cancel
Save