浏览代码

Add some docs; better sorting function.

tags/v0.2
Ben Kurtovic 10 年前
父节点
当前提交
9fd145da5c
共有 3 个文件被更改,包括 20 次插入4 次删除
  1. +1
    -0
      earwigbot/wiki/copyvios/__init__.py
  2. +13
    -3
      earwigbot/wiki/copyvios/result.py
  3. +6
    -1
      earwigbot/wiki/copyvios/workers.py

+ 1
- 0
earwigbot/wiki/copyvios/__init__.py 查看文件

@@ -137,6 +137,7 @@ class CopyvioMixIn(object):
workspace.enqueue(searcher.search(chunk), exclude)
num_queries += 1
sleep(1)

workspace.wait()
result = workspace.get_result(num_queries)
self._logger.info(result.get_log_message(self.title))


+ 13
- 3
earwigbot/wiki/copyvios/result.py 查看文件

@@ -33,6 +33,13 @@ class CopyvioSource(object):

A class that represents a single possible source of a copyright violation,
i.e., a URL.

*Attributes:*

- :py:attr:`url`: the URL of the source
- :py:attr:`confidence`: the confidence of a violation, between 0 and 1
- :py:attr:`chains`: a 2-tuple of the source chain and the delta chain
- :py:attr:`skipped`: whether this URL was skipped during the check
"""

def __init__(self, workspace, url, key, headers=None, timeout=5):
@@ -101,6 +108,9 @@ class CopyvioCheckResult(object):

- :py:attr:`violation`: ``True`` if this is a violation, else ``False``
- :py:attr:`sources`: a list of CopyvioSources, sorted by confidence
- :py:attr:`best`: the best matching CopyvioSource, or ``None``
- :py:attr:`confidence`: the best matching source's confidence, or 0
- :py:attr:`url`: the best matching source's URL, or ``None``
- :py:attr:`queries`: the number of queries used to reach a result
- :py:attr:`time`: the amount of time the check took to complete
- :py:attr:`article_chain`: the MarkovChain of the article text
@@ -136,7 +146,7 @@ class CopyvioCheckResult(object):

@property
def url(self):
"""The url of the best source, or None if no sources exist."""
"""The URL of the best source, or None if no sources exist."""
return self.best.url if self.best else None

def get_log_message(self, title):
@@ -144,7 +154,7 @@ class CopyvioCheckResult(object):
if not self.sources:
log = u"No violation for [[{0}]] (no sources; {1} queries; {2} seconds)"
return log.format(title, self.queries, self.time)
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} queries; {5} seconds)"
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)"
is_vio = "Violation detected" if self.violation else "No violation"
return log.format(is_vio, title, self.url, self.confidence,
self.queries, self.time)
len(self.sources), self.queries, self.time)

+ 6
- 1
earwigbot/wiki/copyvios/workers.py 查看文件

@@ -358,6 +358,11 @@ class CopyvioWorkspace(object):

def get_result(self, num_queries=0):
"""Return a CopyvioCheckResult containing the results of this check."""
self.sources.sort(key=lambda source: source.confidence, reverse=True)
def cmpfunc(s1, s2):
if s2.confidence != s1.confidence:
return 1 if s2.confidence > s1.confidence else -1
return int(s1.skipped) - int(s2.skipped)

self.sources.sort(cmpfunc)
return CopyvioCheckResult(self.finished, self.sources, num_queries,
time() - self._start_time, self._article)

正在加载...
取消
保存