ソースを参照

Add some docs; better sorting function.

tags/v0.2
Ben Kurtovic 10年前
コミット
9fd145da5c
3個のファイルの変更20行の追加4行の削除
  1. +1
    -0
      earwigbot/wiki/copyvios/__init__.py
  2. +13
    -3
      earwigbot/wiki/copyvios/result.py
  3. +6
    -1
      earwigbot/wiki/copyvios/workers.py

+ 1
- 0
earwigbot/wiki/copyvios/__init__.py ファイルの表示

@@ -137,6 +137,7 @@ class CopyvioMixIn(object):
workspace.enqueue(searcher.search(chunk), exclude)
num_queries += 1
sleep(1)

workspace.wait()
result = workspace.get_result(num_queries)
self._logger.info(result.get_log_message(self.title))


+ 13
- 3
earwigbot/wiki/copyvios/result.py ファイルの表示

@@ -33,6 +33,13 @@ class CopyvioSource(object):

A class that represents a single possible source of a copyright violation,
i.e., a URL.

*Attributes:*

- :py:attr:`url`: the URL of the source
- :py:attr:`confidence`: the confidence of a violation, between 0 and 1
- :py:attr:`chains`: a 2-tuple of the source chain and the delta chain
- :py:attr:`skipped`: whether this URL was skipped during the check
"""

def __init__(self, workspace, url, key, headers=None, timeout=5):
@@ -101,6 +108,9 @@ class CopyvioCheckResult(object):

- :py:attr:`violation`: ``True`` if this is a violation, else ``False``
- :py:attr:`sources`: a list of CopyvioSources, sorted by confidence
- :py:attr:`best`: the best matching CopyvioSource, or ``None``
- :py:attr:`confidence`: the best matching source's confidence, or 0
- :py:attr:`url`: the best matching source's URL, or ``None``
- :py:attr:`queries`: the number of queries used to reach a result
- :py:attr:`time`: the amount of time the check took to complete
- :py:attr:`article_chain`: the MarkovChain of the article text
@@ -136,7 +146,7 @@ class CopyvioCheckResult(object):

@property
def url(self):
"""The url of the best source, or None if no sources exist."""
"""The URL of the best source, or None if no sources exist."""
return self.best.url if self.best else None

def get_log_message(self, title):
@@ -144,7 +154,7 @@ class CopyvioCheckResult(object):
if not self.sources:
log = u"No violation for [[{0}]] (no sources; {1} queries; {2} seconds)"
return log.format(title, self.queries, self.time)
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} queries; {5} seconds)"
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)"
is_vio = "Violation detected" if self.violation else "No violation"
return log.format(is_vio, title, self.url, self.confidence,
self.queries, self.time)
len(self.sources), self.queries, self.time)

+ 6
- 1
earwigbot/wiki/copyvios/workers.py ファイルの表示

@@ -358,6 +358,11 @@ class CopyvioWorkspace(object):

def get_result(self, num_queries=0):
"""Return a CopyvioCheckResult containing the results of this check."""
self.sources.sort(key=lambda source: source.confidence, reverse=True)
def cmpfunc(s1, s2):
if s2.confidence != s1.confidence:
return 1 if s2.confidence > s1.confidence else -1
return int(s1.skipped) - int(s2.skipped)

self.sources.sort(cmpfunc)
return CopyvioCheckResult(self.finished, self.sources, num_queries,
time() - self._start_time, self._article)

読み込み中…
キャンセル
保存