Browse Source

Support new 'excluded' column in database.

pull/24/head
Ben Kurtovic 9 years ago
parent
commit
321e6e3352
1 changed files with 7 additions and 5 deletions
  1. +7
    -5
      copyvios/checker.py

+ 7
- 5
copyvios/checker.py View File

@@ -127,7 +127,7 @@ def _get_cached_results(page, conn, mode, noskip):
cache_possible_miss cache_possible_miss
FROM cache FROM cache
WHERE cache_id = ?""" WHERE cache_id = ?"""
query3 = """SELECT cdata_url, cdata_confidence, cdata_skipped
query3 = """SELECT cdata_url, cdata_confidence, cdata_skipped, cdata_excluded
FROM cache_data FROM cache_data
WHERE cdata_cache_id = ?""" WHERE cdata_cache_id = ?"""
cache_id = buffer(sha256(mode + page.get().encode("utf8")).digest()) cache_id = buffer(sha256(mode + page.get().encode("utf8")).digest())
@@ -153,19 +153,20 @@ def _get_cached_results(page, conn, mode, noskip):
result.cache_age = _format_date(cache_time) result.cache_age = _format_date(cache_time)
return result return result


url, confidence, skipped = data.pop(0)
url, confidence, skipped, excluded = data.pop(0)
if skipped: # Should be impossible: data must be bad; run a new check if skipped: # Should be impossible: data must be bad; run a new check
return None return None
result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=30) result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=30)
if abs(result.confidence - confidence) >= 0.0001: if abs(result.confidence - confidence) >= 0.0001:
return None return None


for url, confidence, skipped in data:
for url, confidence, skipped, excluded in data:
if noskip and skipped: if noskip and skipped:
return None return None
source = CopyvioSource(None, url) source = CopyvioSource(None, url)
source.confidence = confidence source.confidence = confidence
source.skipped = bool(skipped) source.skipped = bool(skipped)
source.excluded = bool(excluded)
result.sources.append(source) result.sources.append(source)
result.queries = queries result.queries = queries
result.time = check_time result.time = check_time
@@ -187,9 +188,10 @@ def _format_date(cache_time):
def _cache_result(page, result, conn, mode): def _cache_result(page, result, conn, mode):
query1 = "DELETE FROM cache WHERE cache_id = ?" query1 = "DELETE FROM cache WHERE cache_id = ?"
query2 = "INSERT INTO cache VALUES (?, DEFAULT, ?, ?, ?)" query2 = "INSERT INTO cache VALUES (?, DEFAULT, ?, ?, ?)"
query3 = "INSERT INTO cache_data VALUES (DEFAULT, ?, ?, ?, ?)"
query3 = "INSERT INTO cache_data VALUES (DEFAULT, ?, ?, ?, ?, ?)"
cache_id = buffer(sha256(mode + page.get().encode("utf8")).digest()) cache_id = buffer(sha256(mode + page.get().encode("utf8")).digest())
data = [(cache_id, source.url[:1024], source.confidence, source.skipped)
data = [(cache_id, source.url[:1024], source.confidence, source.skipped,
source.excluded)
for source in result.sources] for source in result.sources]
with conn.cursor() as cursor: with conn.cursor() as cursor:
cursor.execute("START TRANSACTION") cursor.execute("START TRANSACTION")


Loading…
Cancel
Save