diff --git a/earwigbot/wiki/copyvios/__init__.py b/earwigbot/wiki/copyvios/__init__.py index b12cf09..d3a3b9e 100644 --- a/earwigbot/wiki/copyvios/__init__.py +++ b/earwigbot/wiki/copyvios/__init__.py @@ -45,13 +45,15 @@ _WorkingResult = namedtuple("_WorkingResult", ["url", "confidence", "chains"]) class _CopyvioWorkspace(object): """Manages a single copyvio check distributed across threads.""" - def __init__(self, article, min_confidence, until, headers, url_timeout=5): + def __init__(self, article, min_confidence, until, logger, headers, + url_timeout=5): self.best = _WorkingResult(None, 0.0, (EMPTY, EMPTY_INTERSECTION)) self.until = until self._article = article self._handled_urls = [] self._headers = headers + self._logger = logger.getChild("copyvios") self._min_confidence = min_confidence self._queue = Queue() self._result_lock = Lock() @@ -115,6 +117,7 @@ class _CopyvioWorkspace(object): """Compare a source to the article, and update the working result.""" delta = MarkovChainIntersection(self._article, source) confidence = self._calculate_confidence(delta) + self._logger.debug("{0} -> {1}".format(url, confidence)) with self._result_lock: if confidence > self.best.confidence: self.best = _WorkingResult(url, confidence, (source, delta)) @@ -266,7 +269,7 @@ class CopyvioMixIn(object): parser = ArticleTextParser(self.get()) article = MarkovChain(parser.strip()) workspace = _CopyvioWorkspace(article, min_confidence, - until, self._addheaders) + until, self._logger, self._addheaders) if self._exclusions_db: self._exclusions_db.sync(self.site.name) exclude = lambda u: self._exclusions_db.check(self.site.name, u) @@ -276,7 +279,7 @@ class CopyvioMixIn(object): if article.size() < 20: # Auto-fail very small articles result = CopyvioCheckResult(False, 0.0, None, 0, 0, article, workspace.best.chains) - self._logger.debug(result.get_log_message(self.title)) + self._logger.info(result.get_log_message(self.title)) return result workspace.spawn(worker_threads) @@ -296,7 +299,7 @@ class CopyvioMixIn(object): workspace.best.confidence >= min_confidence, workspace.best.confidence, workspace.best.url, len(chunks), time() - start_time, article, workspace.best.chains) - self._logger.debug(result.get_log_message(self.title)) + self._logger.info(result.get_log_message(self.title)) return result def copyvio_compare(self, url, min_confidence=0.5, max_time=30): @@ -324,12 +327,12 @@ class CopyvioMixIn(object): until = (start_time + max_time) if max_time > 0 else None article = MarkovChain(ArticleTextParser(self.get()).strip()) workspace = _CopyvioWorkspace(article, min_confidence, - until, self._addheaders) + until, self._logger, self._addheaders) workspace.enqueue([url]) workspace.spawn(1) workspace.wait() url, conf, chains = workspace.best result = CopyvioCheckResult(conf >= min_confidence, conf, url, 0, time() - start_time, article, chains) - self._logger.debug(result.get_log_message(self.title)) + self._logger.info(result.get_log_message(self.title)) return result