Kaynağa Gözat

Add some docs; better sorting function.

tags/v0.2
Ben Kurtovic 9 yıl önce
ebeveyn
işleme
9fd145da5c
3 değiştirilmiş dosya ile 20 ekleme ve 4 silme
  1. +1
    -0
      earwigbot/wiki/copyvios/__init__.py
  2. +13
    -3
      earwigbot/wiki/copyvios/result.py
  3. +6
    -1
      earwigbot/wiki/copyvios/workers.py

+ 1
- 0
earwigbot/wiki/copyvios/__init__.py Dosyayı Görüntüle

@@ -137,6 +137,7 @@ class CopyvioMixIn(object):
workspace.enqueue(searcher.search(chunk), exclude)
num_queries += 1
sleep(1)

workspace.wait()
result = workspace.get_result(num_queries)
self._logger.info(result.get_log_message(self.title))


+ 13
- 3
earwigbot/wiki/copyvios/result.py Dosyayı Görüntüle

@@ -33,6 +33,13 @@ class CopyvioSource(object):

A class that represents a single possible source of a copyright violation,
i.e., a URL.

*Attributes:*

- :py:attr:`url`: the URL of the source
- :py:attr:`confidence`: the confidence of a violation, between 0 and 1
- :py:attr:`chains`: a 2-tuple of the source chain and the delta chain
- :py:attr:`skipped`: whether this URL was skipped during the check
"""

def __init__(self, workspace, url, key, headers=None, timeout=5):
@@ -101,6 +108,9 @@ class CopyvioCheckResult(object):

- :py:attr:`violation`: ``True`` if this is a violation, else ``False``
- :py:attr:`sources`: a list of CopyvioSources, sorted by confidence
- :py:attr:`best`: the best matching CopyvioSource, or ``None``
- :py:attr:`confidence`: the best matching source's confidence, or 0
- :py:attr:`url`: the best matching source's URL, or ``None``
- :py:attr:`queries`: the number of queries used to reach a result
- :py:attr:`time`: the amount of time the check took to complete
- :py:attr:`article_chain`: the MarkovChain of the article text
@@ -136,7 +146,7 @@ class CopyvioCheckResult(object):

@property
def url(self):
"""The url of the best source, or None if no sources exist."""
"""The URL of the best source, or None if no sources exist."""
return self.best.url if self.best else None

def get_log_message(self, title):
@@ -144,7 +154,7 @@ class CopyvioCheckResult(object):
if not self.sources:
log = u"No violation for [[{0}]] (no sources; {1} queries; {2} seconds)"
return log.format(title, self.queries, self.time)
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} queries; {5} seconds)"
log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)"
is_vio = "Violation detected" if self.violation else "No violation"
return log.format(is_vio, title, self.url, self.confidence,
self.queries, self.time)
len(self.sources), self.queries, self.time)

+ 6
- 1
earwigbot/wiki/copyvios/workers.py Dosyayı Görüntüle

@@ -358,6 +358,11 @@ class CopyvioWorkspace(object):

def get_result(self, num_queries=0):
"""Return a CopyvioCheckResult containing the results of this check."""
self.sources.sort(key=lambda source: source.confidence, reverse=True)
def cmpfunc(s1, s2):
if s2.confidence != s1.confidence:
return 1 if s2.confidence > s1.confidence else -1
return int(s1.skipped) - int(s2.skipped)

self.sources.sort(cmpfunc)
return CopyvioCheckResult(self.finished, self.sources, num_queries,
time() - self._start_time, self._article)

Yükleniyor…
İptal
Kaydet