소스 검색

Add some docs; better sorting function.

tags/v0.2
Ben Kurtovic 10 년 전
부모
커밋
9fd145da5c
3개의 변경된 파일20개의 추가작업 그리고 4개의 파일을 삭제
  1. +1
    -0
      earwigbot/wiki/copyvios/__init__.py
  2. +13
    -3
      earwigbot/wiki/copyvios/result.py
  3. +6
    -1
      earwigbot/wiki/copyvios/workers.py

+ 1
- 0
earwigbot/wiki/copyvios/__init__.py 파일 보기

@@ -137,6 +137,7 @@ class CopyvioMixIn(object):
workspace.enqueue(searcher.search(chunk), exclude)
num_queries += 1
sleep(1)

workspace.wait()
result = workspace.get_result(num_queries)
self._logger.info(result.get_log_message(self.title))


+ 13
- 3
earwigbot/wiki/copyvios/result.py 파일 보기

@@ -33,6 +33,13 @@ class CopyvioSource(object):

A class that represents a single possible source of a copyright violation,
i.e., a URL.

*Attributes:*

- :py:attr:`url`: the URL of the source
- :py:attr:`confidence`: the confidence of a violation, between 0 and 1
- :py:attr:`chains`: a 2-tuple of the source chain and the delta chain
- :py:attr:`skipped`: whether this URL was skipped during the check
"""

def __init__(self, workspace, url, key, headers=None, timeout=5):
@@ -101,6 +108,9 @@ class CopyvioCheckResult(object):

- :py:attr:`violation`: ``True`` if this is a violation, else ``False``
- :py:attr:`sources`: a list of CopyvioSources, sorted by confidence
- :py:attr:`best`: the best matching CopyvioSource, or ``None``
- :py:attr:`confidence`: the best matching source's confidence, or 0
- :py:attr:`url`: the best matching source's URL, or ``None``
- :py:attr:`queries`: the number of queries used to reach a result
- :py:attr:`time`: the amount of time the check took to complete
- :py:attr:`article_chain`: the MarkovChain of the article text
@@ -136,7 +146,7 @@ class CopyvioCheckResult(object):

@property
def url(self):
"""The url of the best source, or None if no sources exist."""
"""The URL of the best source, or None if no sources exist."""
return self.best.url if self.best else None

def get_log_message(self, title):
@@ -144,7 +154,7 @@ class CopyvioCheckResult(object):
if not self.sources:
log = u"No violation for [[{0}]] (no sources; {1} queries; {2} seconds)"
return log.format(title, self.queries, self.time)
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} queries; {5} seconds)"
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)"
is_vio = "Violation detected" if self.violation else "No violation"
return log.format(is_vio, title, self.url, self.confidence,
self.queries, self.time)
len(self.sources), self.queries, self.time)

+ 6
- 1
earwigbot/wiki/copyvios/workers.py 파일 보기

@@ -358,6 +358,11 @@ class CopyvioWorkspace(object):

def get_result(self, num_queries=0):
"""Return a CopyvioCheckResult containing the results of this check."""
self.sources.sort(key=lambda source: source.confidence, reverse=True)
def cmpfunc(s1, s2):
if s2.confidence != s1.confidence:
return 1 if s2.confidence > s1.confidence else -1
return int(s1.skipped) - int(s2.skipped)

self.sources.sort(cmpfunc)
return CopyvioCheckResult(self.finished, self.sources, num_queries,
time() - self._start_time, self._article)

불러오는 중...
취소
저장