From b88181bb24ffb93931612c7ac702a7df5320d456 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 25 Aug 2013 22:17:45 -0400 Subject: [PATCH] Make exclusion check a bit smarter. --- earwigbot/wiki/copyvios/exclusions.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/earwigbot/wiki/copyvios/exclusions.py b/earwigbot/wiki/copyvios/exclusions.py index dbbe0c9..7b36d2a 100644 --- a/earwigbot/wiki/copyvios/exclusions.py +++ b/earwigbot/wiki/copyvios/exclusions.py @@ -161,16 +161,15 @@ class ExclusionsDB(object): Return ``True`` if the URL is in the database, or ``False`` otherwise. """ - normalized = re.sub("https?://", "", url.lower()) + normalized = re.sub(r"https?://(www\.)?", "", url.lower()) query = """SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ? OR exclusion_sitename = ?""" with sqlite.connect(self._dbfile) as conn, self._db_access_lock: for (excl,) in conn.execute(query, (sitename, "all")): if excl.startswith("*."): - netloc = urlparse(url.lower()).netloc - matches = True if excl[2:] in netloc else False + matches = excl[2:] in urlparse(url.lower()).netloc else: - matches = True if normalized.startswith(excl) else False + matches = normalized.startswith(excl) if matches: log = u"Exclusion detected in {0} for {1}" self._logger.debug(log.format(sitename, url))