Browse Source

Fully implement logging; fix non-unicode log messages.

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
439b855254
4 changed files with 34 additions and 17 deletions
  1. +7
    -7
      earwigbot/tasks/afc_copyvios.py
  2. +21
    -6
      earwigbot/wiki/copyvios/__init__.py
  3. +4
    -4
      earwigbot/wiki/copyvios/exclusions.py
  4. +2
    -0
      earwigbot/wiki/copyvios/search.py

+ 7
- 7
earwigbot/tasks/afc_copyvios.py View File

@@ -70,17 +70,17 @@ class AFCCopyvios(Task):
"""Detect copyvios in 'page' and add a note if any are found."""
title = page.title
if title in self.ignore_list:
msg = "Skipping page in ignore list: [[{0}]]"
msg = u"Skipping page in ignore list: [[{0}]]"
self.logger.info(msg.format(title))
return

pageid = page.pageid
if self.has_been_processed(pageid):
msg = "Skipping check on already processed page [[{0}]]"
msg = u"Skipping check on already processed page [[{0}]]"
self.logger.info(msg.format(title))
return

self.logger.info("Checking [[{0}]]".format(title))
self.logger.info(u"Checking [[{0}]]".format(title))
result = page.copyvio_check(self.min_confidence, self.max_queries)
url = result.url
confidence = "{0}%".format(round(result.confidence * 100, 2))
@@ -94,11 +94,11 @@ class AFCCopyvios(Task):
page.edit(newtext, self.summary.format(url=url))
else:
page.edit(newtext, self.summary)
msg = "Found violation: [[{0}]] -> {1} ({2} confidence)"
self.logger.warn(msg.format(title, url, confidence))
msg = u"Found violation: [[{0}]] -> {1} ({2} confidence)"
self.logger.info(msg.format(title, url, confidence))
else:
msg = "No violations detected (best: {1} at {2} confidence)"
self.logger.debug(msg.format(url, confidence))
msg = u"No violations detected in [[{0}]] (best: {1} at {2} confidence)"
self.logger.info(msg.format(title, url, confidence))

self.log_processed(pageid)
if self.cache_results:


+ 21
- 6
earwigbot/wiki/copyvios/__init__.py View File

@@ -155,7 +155,10 @@ class CopyvioMixIn(object):

while (chunks and best_confidence < min_confidence and
(max_queries < 0 or num_queries < max_queries)):
urls = searcher.search(chunks.pop(0))
chunk = chunks.pop(0)
log = u"[[{0}]] -> querying {1} for {2!r}"
self._logger.debug(log.format(self.title, searcher.name, chunk))
urls = searcher.search(chunk)
urls = [url for url in urls if url not in handled_urls]
for url in urls:
handled_urls.append(url)
@@ -172,12 +175,19 @@ class CopyvioMixIn(object):
sleep(interquery_sleep - diff)
last_query = time()

if best_confidence >= min_confidence: # violation?
v = True
if best_confidence >= min_confidence:
is_violation = True
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2}; using {3} queries)"
self._logger.debug(log.format(self.title, best_confidence,
best_match, num_queries))
else:
v = False
return CopyvioCheckResult(v, best_confidence, best_match, num_queries,
article_chain, best_chains)
is_violation = False
log = u"No violation for [[{0}]] (confidence: {1}; using {2} queries)"
self._logger.debug(log.format(self.title, best_confidence,
num_queries))

return CopyvioCheckResult(is_violation, best_confidence, best_match,
num_queries, article_chain, best_chains)

def copyvio_compare(self, url, min_confidence=0.5):
"""Check the page like :py:meth:`copyvio_check` against a specific URL.
@@ -208,7 +218,12 @@ class CopyvioMixIn(object):

if confidence >= min_confidence:
is_violation = True
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2})"
self._logger.debug(log.format(self.title, confidence, url))
else:
is_violation = False
log = u"No violation for [[{0}]] (confidence: {1}; URL: {2})"
self._logger.debug(log.format(self.title, confidence, url))

return CopyvioCheckResult(is_violation, confidence, url, 0,
article_chain, chains)

+ 4
- 4
earwigbot/wiki/copyvios/exclusions.py View File

@@ -138,11 +138,11 @@ class ExclusionsDB(object):
max_staleness = 60 * 60 * 24 * 30
time_since_update = int(time() - self._get_last_update())
if time_since_update > max_staleness:
log = "Updating stale database: {0} (last updated {1} seconds ago)"
log = u"Updating stale database: {0} (last updated {1} seconds ago)"
self._logger.info(log.format(sitename, time_since_update))
self._update(sitename)
else:
log = "Database for {0} is still fresh (last updated {1} seconds ago)"
log = u"Database for {0} is still fresh (last updated {1} seconds ago)"
self._logger.debug(log.format(sitename, time_since_update))

def check(self, sitename, url):
@@ -155,10 +155,10 @@ class ExclusionsDB(object):
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
for row in conn.execute(query, (sitename,)):
if normalized.startswith(row[0]):
log = "Exclusion detected in {0} for {1}"
log = u"Exclusion detected in {0} for {1}"
self._logger.debug(log.format(sitename, url))
return True

log = "No exclusions in {0} for {1}".format(sitename, url)
log = u"No exclusions in {0} for {1}".format(sitename, url)
self._logger.debug(log)
return False

+ 2
- 0
earwigbot/wiki/copyvios/search.py View File

@@ -34,6 +34,7 @@ __all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]

class BaseSearchEngine(object):
"""Base class for a simple search engine interface."""
name = "Base"

def __init__(self, cred):
"""Store credentials *cred* for searching later on."""
@@ -57,6 +58,7 @@ class BaseSearchEngine(object):

class YahooBOSSSearchEngine(BaseSearchEngine):
"""A search engine interface with Yahoo! BOSS."""
name = "Yahoo! BOSS"

def search(self, query):
"""Do a Yahoo! BOSS web search for *query*.


Loading…
Cancel
Save