Browse Source

Fully implement logging; fix non-unicode log messages.

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
439b855254
4 changed files with 34 additions and 17 deletions
  1. +7
    -7
      earwigbot/tasks/afc_copyvios.py
  2. +21
    -6
      earwigbot/wiki/copyvios/__init__.py
  3. +4
    -4
      earwigbot/wiki/copyvios/exclusions.py
  4. +2
    -0
      earwigbot/wiki/copyvios/search.py

+ 7
- 7
earwigbot/tasks/afc_copyvios.py View File

@@ -70,17 +70,17 @@ class AFCCopyvios(Task):
"""Detect copyvios in 'page' and add a note if any are found.""" """Detect copyvios in 'page' and add a note if any are found."""
title = page.title title = page.title
if title in self.ignore_list: if title in self.ignore_list:
msg = "Skipping page in ignore list: [[{0}]]"
msg = u"Skipping page in ignore list: [[{0}]]"
self.logger.info(msg.format(title)) self.logger.info(msg.format(title))
return return


pageid = page.pageid pageid = page.pageid
if self.has_been_processed(pageid): if self.has_been_processed(pageid):
msg = "Skipping check on already processed page [[{0}]]"
msg = u"Skipping check on already processed page [[{0}]]"
self.logger.info(msg.format(title)) self.logger.info(msg.format(title))
return return


self.logger.info("Checking [[{0}]]".format(title))
self.logger.info(u"Checking [[{0}]]".format(title))
result = page.copyvio_check(self.min_confidence, self.max_queries) result = page.copyvio_check(self.min_confidence, self.max_queries)
url = result.url url = result.url
confidence = "{0}%".format(round(result.confidence * 100, 2)) confidence = "{0}%".format(round(result.confidence * 100, 2))
@@ -94,11 +94,11 @@ class AFCCopyvios(Task):
page.edit(newtext, self.summary.format(url=url)) page.edit(newtext, self.summary.format(url=url))
else: else:
page.edit(newtext, self.summary) page.edit(newtext, self.summary)
msg = "Found violation: [[{0}]] -> {1} ({2} confidence)"
self.logger.warn(msg.format(title, url, confidence))
msg = u"Found violation: [[{0}]] -> {1} ({2} confidence)"
self.logger.info(msg.format(title, url, confidence))
else: else:
msg = "No violations detected (best: {1} at {2} confidence)"
self.logger.debug(msg.format(url, confidence))
msg = u"No violations detected in [[{0}]] (best: {1} at {2} confidence)"
self.logger.info(msg.format(title, url, confidence))


self.log_processed(pageid) self.log_processed(pageid)
if self.cache_results: if self.cache_results:


+ 21
- 6
earwigbot/wiki/copyvios/__init__.py View File

@@ -155,7 +155,10 @@ class CopyvioMixIn(object):


while (chunks and best_confidence < min_confidence and while (chunks and best_confidence < min_confidence and
(max_queries < 0 or num_queries < max_queries)): (max_queries < 0 or num_queries < max_queries)):
urls = searcher.search(chunks.pop(0))
chunk = chunks.pop(0)
log = u"[[{0}]] -> querying {1} for {2!r}"
self._logger.debug(log.format(self.title, searcher.name, chunk))
urls = searcher.search(chunk)
urls = [url for url in urls if url not in handled_urls] urls = [url for url in urls if url not in handled_urls]
for url in urls: for url in urls:
handled_urls.append(url) handled_urls.append(url)
@@ -172,12 +175,19 @@ class CopyvioMixIn(object):
sleep(interquery_sleep - diff) sleep(interquery_sleep - diff)
last_query = time() last_query = time()


if best_confidence >= min_confidence: # violation?
v = True
if best_confidence >= min_confidence:
is_violation = True
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2}; using {3} queries)"
self._logger.debug(log.format(self.title, best_confidence,
best_match, num_queries))
else: else:
v = False
return CopyvioCheckResult(v, best_confidence, best_match, num_queries,
article_chain, best_chains)
is_violation = False
log = u"No violation for [[{0}]] (confidence: {1}; using {2} queries)"
self._logger.debug(log.format(self.title, best_confidence,
num_queries))

return CopyvioCheckResult(is_violation, best_confidence, best_match,
num_queries, article_chain, best_chains)


def copyvio_compare(self, url, min_confidence=0.5): def copyvio_compare(self, url, min_confidence=0.5):
"""Check the page like :py:meth:`copyvio_check` against a specific URL. """Check the page like :py:meth:`copyvio_check` against a specific URL.
@@ -208,7 +218,12 @@ class CopyvioMixIn(object):


if confidence >= min_confidence: if confidence >= min_confidence:
is_violation = True is_violation = True
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2})"
self._logger.debug(log.format(self.title, confidence, url))
else: else:
is_violation = False is_violation = False
log = u"No violation for [[{0}]] (confidence: {1}; URL: {2})"
self._logger.debug(log.format(self.title, confidence, url))

return CopyvioCheckResult(is_violation, confidence, url, 0, return CopyvioCheckResult(is_violation, confidence, url, 0,
article_chain, chains) article_chain, chains)

+ 4
- 4
earwigbot/wiki/copyvios/exclusions.py View File

@@ -138,11 +138,11 @@ class ExclusionsDB(object):
max_staleness = 60 * 60 * 24 * 30 max_staleness = 60 * 60 * 24 * 30
time_since_update = int(time() - self._get_last_update()) time_since_update = int(time() - self._get_last_update())
if time_since_update > max_staleness: if time_since_update > max_staleness:
log = "Updating stale database: {0} (last updated {1} seconds ago)"
log = u"Updating stale database: {0} (last updated {1} seconds ago)"
self._logger.info(log.format(sitename, time_since_update)) self._logger.info(log.format(sitename, time_since_update))
self._update(sitename) self._update(sitename)
else: else:
log = "Database for {0} is still fresh (last updated {1} seconds ago)"
log = u"Database for {0} is still fresh (last updated {1} seconds ago)"
self._logger.debug(log.format(sitename, time_since_update)) self._logger.debug(log.format(sitename, time_since_update))


def check(self, sitename, url): def check(self, sitename, url):
@@ -155,10 +155,10 @@ class ExclusionsDB(object):
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
for row in conn.execute(query, (sitename,)): for row in conn.execute(query, (sitename,)):
if normalized.startswith(row[0]): if normalized.startswith(row[0]):
log = "Exclusion detected in {0} for {1}"
log = u"Exclusion detected in {0} for {1}"
self._logger.debug(log.format(sitename, url)) self._logger.debug(log.format(sitename, url))
return True return True


log = "No exclusions in {0} for {1}".format(sitename, url)
log = u"No exclusions in {0} for {1}".format(sitename, url)
self._logger.debug(log) self._logger.debug(log)
return False return False

+ 2
- 0
earwigbot/wiki/copyvios/search.py View File

@@ -34,6 +34,7 @@ __all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]


class BaseSearchEngine(object): class BaseSearchEngine(object):
"""Base class for a simple search engine interface.""" """Base class for a simple search engine interface."""
name = "Base"


def __init__(self, cred): def __init__(self, cred):
"""Store credentials *cred* for searching later on.""" """Store credentials *cred* for searching later on."""
@@ -57,6 +58,7 @@ class BaseSearchEngine(object):


class YahooBOSSSearchEngine(BaseSearchEngine): class YahooBOSSSearchEngine(BaseSearchEngine):
"""A search engine interface with Yahoo! BOSS.""" """A search engine interface with Yahoo! BOSS."""
name = "Yahoo! BOSS"


def search(self, query): def search(self, query):
"""Do a Yahoo! BOSS web search for *query*. """Do a Yahoo! BOSS web search for *query*.


Loading…
Cancel
Save