diff --git a/earwigbot/wiki/copyvios/parsers.py b/earwigbot/wiki/copyvios/parsers.py index fc0d748..70e55fa 100644 --- a/earwigbot/wiki/copyvios/parsers.py +++ b/earwigbot/wiki/copyvios/parsers.py @@ -62,17 +62,29 @@ class ArticleTextParser(BaseTextParser): The actual stripping is handled by :py:mod:`mwparserfromhell`. """ + def remove(code, node): + """Remove a node from a code object, ignoring ValueError. + + Sometimes we will remove a node that contains another node we wish + to remove, and we fail when we try to remove the inner one. Easiest + solution is to just ignore the exception. + """ + try: + code.remove(node) + except ValueError: + pass + wikicode = mwparserfromhell.parse(self.text) # Preemtively strip some links mwparser doesn't know about: bad_prefixes = ("file:", "image:", "category:") for link in wikicode.filter_wikilinks(): if link.title.strip().lower().startswith(bad_prefixes): - wikicode.remove(link) + remove(wikicode, link) # Also strip references: for tag in wikicode.filter_tags(matches=lambda tag: tag.tag == "ref"): - wikicode.remove(tag) + remove(wikicode, tag) clean = wikicode.strip_code(normalize=True, collapse=True) self.clean = clean.replace("\n\n", "\n").strip()