From 6b146a397a548932141755a39dd6eca28e1b34e9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 18 Jul 2014 16:50:43 -0400 Subject: [PATCH] Also strip out files and categories in ATP.strip(). --- earwigbot/wiki/copyvios/parsers.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/earwigbot/wiki/copyvios/parsers.py b/earwigbot/wiki/copyvios/parsers.py index e21af0e..e9ac415 100644 --- a/earwigbot/wiki/copyvios/parsers.py +++ b/earwigbot/wiki/copyvios/parsers.py @@ -63,6 +63,13 @@ class ArticleTextParser(BaseTextParser): The actual stripping is handled by :py:mod:`mwparserfromhell`. """ wikicode = mwparserfromhell.parse(self.text) + + # Preemtively strip some links mwparser doesn't know about: + bad_prefixes = ("file:", "image:", "category:") + for link in wikicode.filter_wikilinks(): + if link.title.strip().lower().startswith(bad_prefixes): + wikicode.remove(link) + clean = wikicode.strip_code(normalize=True, collapse=True) self.clean = clean.replace("\n\n", "\n").strip() return self.clean