From bb819c93065b77467e94c2da83cbb43ce92bcb6c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 29 Sep 2015 02:26:32 -0500 Subject: [PATCH] Explicitly include excluded URLs in the result set; mark as excluded. --- earwigbot/wiki/copyvios/result.py | 10 ++++++++-- earwigbot/wiki/copyvios/workers.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/earwigbot/wiki/copyvios/result.py b/earwigbot/wiki/copyvios/result.py index 85b5cc4..f044c03 100644 --- a/earwigbot/wiki/copyvios/result.py +++ b/earwigbot/wiki/copyvios/result.py @@ -40,6 +40,7 @@ class CopyvioSource(object): - :py:attr:`confidence`: the confidence of a violation, between 0 and 1 - :py:attr:`chains`: a 2-tuple of the source chain and the delta chain - :py:attr:`skipped`: whether this URL was skipped during the check + - :py:attr:`excluded`: whether this URL was in the exclusions list """ def __init__(self, workspace, url, headers=None, timeout=5): @@ -50,6 +51,7 @@ class CopyvioSource(object): self.confidence = 0.0 self.chains = (EMPTY, EMPTY_INTERSECTION) self.skipped = False + self.excluded = False self._event1 = Event() self._event2 = Event() @@ -57,11 +59,15 @@ class CopyvioSource(object): def __repr__(self): """Return the canonical string representation of the source.""" - res = "CopyvioSource(url={0!r}, confidence={1!r}, skipped={2!r})" - return res.format(self.url, self.confidence, self.skipped) + res = ("CopyvioSource(url={0!r}, confidence={1!r}, skipped={2!r}, " + "excluded={3!r})") + return res.format( + self.url, self.confidence, self.skipped, self.excluded) def __str__(self): """Return a nice string representation of the source.""" + if self.excluded: + return "".format(self.url) if self.skipped: return "".format(self.url) res = "" diff --git a/earwigbot/wiki/copyvios/workers.py b/earwigbot/wiki/copyvios/workers.py index e471651..5230a44 100644 --- a/earwigbot/wiki/copyvios/workers.py +++ b/earwigbot/wiki/copyvios/workers.py @@ -311,11 +311,15 @@ class CopyvioWorkspace(object): if url in self._handled_urls: continue self._handled_urls.add(url) - if exclude_check and exclude_check(url): - continue source = CopyvioSource(url=url, **self._source_args) self.sources.append(source) + + if exclude_check and exclude_check(url): + self._logger.debug(u"enqueue(): exclude {0}".format(url)) + source.excluded = True + source.skip() + continue if self._short_circuit and self.finished: self._logger.debug(u"enqueue(): auto-skip {0}".format(url)) source.skip() @@ -371,6 +375,8 @@ class CopyvioWorkspace(object): def cmpfunc(s1, s2): if s2.confidence != s1.confidence: return 1 if s2.confidence > s1.confidence else -1 + if s2.excluded != s1.excluded: + return 1 if s1.excluded else -1 return int(s1.skipped) - int(s2.skipped) self.sources.sort(cmpfunc)