From 3dde1c5d602707ed35f57979e065cc12e9efc706 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 30 Jun 2014 22:39:51 -0400 Subject: [PATCH] Correctly handle HTML with no tags. --- earwigbot/wiki/copyvios/parsers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/earwigbot/wiki/copyvios/parsers.py b/earwigbot/wiki/copyvios/parsers.py index 29a49f5..e0b2784 100644 --- a/earwigbot/wiki/copyvios/parsers.py +++ b/earwigbot/wiki/copyvios/parsers.py @@ -136,6 +136,10 @@ class HTMLTextParser(BaseTextParser): except ValueError: soup = bs4.BeautifulSoup(self.text).body + if not soup: + # No tag present in HTML -> + # no scrapable content (possibly JS or magic): + return "" is_comment = lambda text: isinstance(text, bs4.element.Comment) for comment in soup.find_all(text=is_comment): comment.extract()