Bläddra i källkod

Update logging.

tags/v0.2
Ben Kurtovic 10 år sedan
förälder
incheckning
0e28f89466
1 ändrade filer med 9 tillägg och 6 borttagningar
  1. +9
    -6
      earwigbot/wiki/copyvios/__init__.py

+ 9
- 6
earwigbot/wiki/copyvios/__init__.py Visa fil

@@ -45,13 +45,15 @@ _WorkingResult = namedtuple("_WorkingResult", ["url", "confidence", "chains"])
class _CopyvioWorkspace(object):
"""Manages a single copyvio check distributed across threads."""

def __init__(self, article, min_confidence, until, headers, url_timeout=5):
def __init__(self, article, min_confidence, until, logger, headers,
url_timeout=5):
self.best = _WorkingResult(None, 0.0, (EMPTY, EMPTY_INTERSECTION))
self.until = until

self._article = article
self._handled_urls = []
self._headers = headers
self._logger = logger.getChild("copyvios")
self._min_confidence = min_confidence
self._queue = Queue()
self._result_lock = Lock()
@@ -115,6 +117,7 @@ class _CopyvioWorkspace(object):
"""Compare a source to the article, and update the working result."""
delta = MarkovChainIntersection(self._article, source)
confidence = self._calculate_confidence(delta)
self._logger.debug("{0} -> {1}".format(url, confidence))
with self._result_lock:
if confidence > self.best.confidence:
self.best = _WorkingResult(url, confidence, (source, delta))
@@ -266,7 +269,7 @@ class CopyvioMixIn(object):
parser = ArticleTextParser(self.get())
article = MarkovChain(parser.strip())
workspace = _CopyvioWorkspace(article, min_confidence,
until, self._addheaders)
until, self._logger, self._addheaders)
if self._exclusions_db:
self._exclusions_db.sync(self.site.name)
exclude = lambda u: self._exclusions_db.check(self.site.name, u)
@@ -276,7 +279,7 @@ class CopyvioMixIn(object):
if article.size() < 20: # Auto-fail very small articles
result = CopyvioCheckResult(False, 0.0, None, 0, 0, article,
workspace.best.chains)
self._logger.debug(result.get_log_message(self.title))
self._logger.info(result.get_log_message(self.title))
return result

workspace.spawn(worker_threads)
@@ -296,7 +299,7 @@ class CopyvioMixIn(object):
workspace.best.confidence >= min_confidence,
workspace.best.confidence, workspace.best.url, len(chunks),
time() - start_time, article, workspace.best.chains)
self._logger.debug(result.get_log_message(self.title))
self._logger.info(result.get_log_message(self.title))
return result

def copyvio_compare(self, url, min_confidence=0.5, max_time=30):
@@ -324,12 +327,12 @@ class CopyvioMixIn(object):
until = (start_time + max_time) if max_time > 0 else None
article = MarkovChain(ArticleTextParser(self.get()).strip())
workspace = _CopyvioWorkspace(article, min_confidence,
until, self._addheaders)
until, self._logger, self._addheaders)
workspace.enqueue([url])
workspace.spawn(1)
workspace.wait()
url, conf, chains = workspace.best
result = CopyvioCheckResult(conf >= min_confidence, conf, url, 0,
time() - start_time, article, chains)
self._logger.debug(result.get_log_message(self.title))
self._logger.info(result.get_log_message(self.title))
return result

Laddar…
Avbryt
Spara