Browse Source

Add some docs; better sorting function.

tags/v0.2
Ben Kurtovic 10 years ago
parent
commit
9fd145da5c
3 changed files with 20 additions and 4 deletions
  1. +1
    -0
      earwigbot/wiki/copyvios/__init__.py
  2. +13
    -3
      earwigbot/wiki/copyvios/result.py
  3. +6
    -1
      earwigbot/wiki/copyvios/workers.py

+ 1
- 0
earwigbot/wiki/copyvios/__init__.py View File

@@ -137,6 +137,7 @@ class CopyvioMixIn(object):
workspace.enqueue(searcher.search(chunk), exclude) workspace.enqueue(searcher.search(chunk), exclude)
num_queries += 1 num_queries += 1
sleep(1) sleep(1)

workspace.wait() workspace.wait()
result = workspace.get_result(num_queries) result = workspace.get_result(num_queries)
self._logger.info(result.get_log_message(self.title)) self._logger.info(result.get_log_message(self.title))


+ 13
- 3
earwigbot/wiki/copyvios/result.py View File

@@ -33,6 +33,13 @@ class CopyvioSource(object):


A class that represents a single possible source of a copyright violation, A class that represents a single possible source of a copyright violation,
i.e., a URL. i.e., a URL.

*Attributes:*

- :py:attr:`url`: the URL of the source
- :py:attr:`confidence`: the confidence of a violation, between 0 and 1
- :py:attr:`chains`: a 2-tuple of the source chain and the delta chain
- :py:attr:`skipped`: whether this URL was skipped during the check
""" """


def __init__(self, workspace, url, key, headers=None, timeout=5): def __init__(self, workspace, url, key, headers=None, timeout=5):
@@ -101,6 +108,9 @@ class CopyvioCheckResult(object):


- :py:attr:`violation`: ``True`` if this is a violation, else ``False`` - :py:attr:`violation`: ``True`` if this is a violation, else ``False``
- :py:attr:`sources`: a list of CopyvioSources, sorted by confidence - :py:attr:`sources`: a list of CopyvioSources, sorted by confidence
- :py:attr:`best`: the best matching CopyvioSource, or ``None``
- :py:attr:`confidence`: the best matching source's confidence, or 0
- :py:attr:`url`: the best matching source's URL, or ``None``
- :py:attr:`queries`: the number of queries used to reach a result - :py:attr:`queries`: the number of queries used to reach a result
- :py:attr:`time`: the amount of time the check took to complete - :py:attr:`time`: the amount of time the check took to complete
- :py:attr:`article_chain`: the MarkovChain of the article text - :py:attr:`article_chain`: the MarkovChain of the article text
@@ -136,7 +146,7 @@ class CopyvioCheckResult(object):


@property @property
def url(self): def url(self):
"""The url of the best source, or None if no sources exist."""
"""The URL of the best source, or None if no sources exist."""
return self.best.url if self.best else None return self.best.url if self.best else None


def get_log_message(self, title): def get_log_message(self, title):
@@ -144,7 +154,7 @@ class CopyvioCheckResult(object):
if not self.sources: if not self.sources:
log = u"No violation for [[{0}]] (no sources; {1} queries; {2} seconds)" log = u"No violation for [[{0}]] (no sources; {1} queries; {2} seconds)"
return log.format(title, self.queries, self.time) return log.format(title, self.queries, self.time)
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} queries; {5} seconds)"
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)"
is_vio = "Violation detected" if self.violation else "No violation" is_vio = "Violation detected" if self.violation else "No violation"
return log.format(is_vio, title, self.url, self.confidence, return log.format(is_vio, title, self.url, self.confidence,
self.queries, self.time)
len(self.sources), self.queries, self.time)

+ 6
- 1
earwigbot/wiki/copyvios/workers.py View File

@@ -358,6 +358,11 @@ class CopyvioWorkspace(object):


def get_result(self, num_queries=0): def get_result(self, num_queries=0):
"""Return a CopyvioCheckResult containing the results of this check.""" """Return a CopyvioCheckResult containing the results of this check."""
self.sources.sort(key=lambda source: source.confidence, reverse=True)
def cmpfunc(s1, s2):
if s2.confidence != s1.confidence:
return 1 if s2.confidence > s1.confidence else -1
return int(s1.skipped) - int(s2.skipped)

self.sources.sort(cmpfunc)
return CopyvioCheckResult(self.finished, self.sources, num_queries, return CopyvioCheckResult(self.finished, self.sources, num_queries,
time() - self._start_time, self._article) time() - self._start_time, self._article)

Loading…
Cancel
Save