Browse Source

Explicitly include excluded URLs in the result set; mark as excluded.

tags/v0.2
Ben Kurtovic 8 years ago
parent
commit
bb819c9306
2 changed files with 16 additions and 4 deletions
  1. +8
    -2
      earwigbot/wiki/copyvios/result.py
  2. +8
    -2
      earwigbot/wiki/copyvios/workers.py

+ 8
- 2
earwigbot/wiki/copyvios/result.py View File

@@ -40,6 +40,7 @@ class CopyvioSource(object):
- :py:attr:`confidence`: the confidence of a violation, between 0 and 1
- :py:attr:`chains`: a 2-tuple of the source chain and the delta chain
- :py:attr:`skipped`: whether this URL was skipped during the check
- :py:attr:`excluded`: whether this URL was in the exclusions list
"""

def __init__(self, workspace, url, headers=None, timeout=5):
@@ -50,6 +51,7 @@ class CopyvioSource(object):
self.confidence = 0.0
self.chains = (EMPTY, EMPTY_INTERSECTION)
self.skipped = False
self.excluded = False

self._event1 = Event()
self._event2 = Event()
@@ -57,11 +59,15 @@ class CopyvioSource(object):

def __repr__(self):
"""Return the canonical string representation of the source."""
res = "CopyvioSource(url={0!r}, confidence={1!r}, skipped={2!r})"
return res.format(self.url, self.confidence, self.skipped)
res = ("CopyvioSource(url={0!r}, confidence={1!r}, skipped={2!r}, "
"excluded={3!r})")
return res.format(
self.url, self.confidence, self.skipped, self.excluded)

def __str__(self):
"""Return a nice string representation of the source."""
if self.excluded:
return "<CopyvioSource ({0}, excluded)>".format(self.url)
if self.skipped:
return "<CopyvioSource ({0}, skipped)>".format(self.url)
res = "<CopyvioSource ({0} with {1} conf)>"


+ 8
- 2
earwigbot/wiki/copyvios/workers.py View File

@@ -311,11 +311,15 @@ class CopyvioWorkspace(object):
if url in self._handled_urls:
continue
self._handled_urls.add(url)
if exclude_check and exclude_check(url):
continue

source = CopyvioSource(url=url, **self._source_args)
self.sources.append(source)

if exclude_check and exclude_check(url):
self._logger.debug(u"enqueue(): exclude {0}".format(url))
source.excluded = True
source.skip()
continue
if self._short_circuit and self.finished:
self._logger.debug(u"enqueue(): auto-skip {0}".format(url))
source.skip()
@@ -371,6 +375,8 @@ class CopyvioWorkspace(object):
def cmpfunc(s1, s2):
if s2.confidence != s1.confidence:
return 1 if s2.confidence > s1.confidence else -1
if s2.excluded != s1.excluded:
return 1 if s1.excluded else -1
return int(s1.skipped) - int(s2.skipped)

self.sources.sort(cmpfunc)


Loading…
Cancel
Save