diff --git a/earwigbot/wiki/copyvios/parsers.py b/earwigbot/wiki/copyvios/parsers.py index e21af0e..e9ac415 100644 --- a/earwigbot/wiki/copyvios/parsers.py +++ b/earwigbot/wiki/copyvios/parsers.py @@ -63,6 +63,13 @@ class ArticleTextParser(BaseTextParser): The actual stripping is handled by :py:mod:`mwparserfromhell`. """ wikicode = mwparserfromhell.parse(self.text) + + # Preemtively strip some links mwparser doesn't know about: + bad_prefixes = ("file:", "image:", "category:") + for link in wikicode.filter_wikilinks(): + if link.title.strip().lower().startswith(bad_prefixes): + wikicode.remove(link) + clean = wikicode.strip_code(normalize=True, collapse=True) self.clean = clean.replace("\n\n", "\n").strip() return self.clean