Browse Source

Cache 'possible_miss' result value.

pull/24/head
Ben Kurtovic 10 years ago
parent
commit
10e75c7b7a
1 changed files with 7 additions and 4 deletions
  1. +7
    -4
      copyvios/checker.py

+ 7
- 4
copyvios/checker.py View File

@@ -123,7 +123,8 @@ def _get_page_by_revid(site, revid):
def _get_cached_results(page, conn, mode, noskip): def _get_cached_results(page, conn, mode, noskip):
query1 = """DELETE FROM cache query1 = """DELETE FROM cache
WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)""" WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"""
query2 = """SELECT cache_time, cache_queries, cache_process_time
query2 = """SELECT cache_time, cache_queries, cache_process_time,
cache_possible_miss
FROM cache FROM cache
WHERE cache_id = ?""" WHERE cache_id = ?"""
query3 = """SELECT cdata_url, cdata_confidence, cdata_skipped query3 = """SELECT cdata_url, cdata_confidence, cdata_skipped
@@ -137,13 +138,14 @@ def _get_cached_results(page, conn, mode, noskip):
results = cursor.fetchall() results = cursor.fetchall()
if not results: if not results:
return None return None
cache_time, queries, check_time = results[0]
cache_time, queries, check_time, possible_miss = results[0]
cursor.execute(query3, (cache_id,)) cursor.execute(query3, (cache_id,))
data = cursor.fetchall() data = cursor.fetchall()


if not data: # TODO: do something less hacky for this edge case if not data: # TODO: do something less hacky for this edge case
artchain = MarkovChain(ArticleTextParser(page.get()).strip())
result = CopyvioCheckResult(False, [], queries, check_time, artchain)
article_chain = MarkovChain(ArticleTextParser(page.get()).strip())
result = CopyvioCheckResult(False, [], queries, check_time,
article_chain, possible_miss)
result.cached = True result.cached = True
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
result.cache_age = _format_date(cache_time) result.cache_age = _format_date(cache_time)
@@ -165,6 +167,7 @@ def _get_cached_results(page, conn, mode, noskip):
result.sources.append(source) result.sources.append(source)
result.queries = queries result.queries = queries
result.time = check_time result.time = check_time
result.possible_miss = possible_miss
result.cached = True result.cached = True
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
result.cache_age = _format_date(cache_time) result.cache_age = _format_date(cache_time)


Loading…
Cancel
Save