From b7bafb29cffb6c2445db5ebc6bec7739088997d9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Sep 2014 14:30:50 -0500 Subject: [PATCH] Implement noskip. --- copyvios/checker.py | 9 ++++++--- templates/index.mako | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/copyvios/checker.py b/copyvios/checker.py index 3924d06..1487ffd 100644 --- a/copyvios/checker.py +++ b/copyvios/checker.py @@ -65,11 +65,12 @@ def _get_results(query, follow=True): return mode = "{0}:{1}:".format(use_engine, use_links) if not query.nocache: - query.result = _get_cached_results(page, conn, mode) + query.result = _get_cached_results(page, conn, mode, query.noskip) if not query.result: query.result = page.copyvio_check( min_confidence=T_SUSPECT, max_queries=10, max_time=45, - no_searches=not use_engine, no_links=not use_links) + no_searches=not use_engine, no_links=not use_links, + short_circuit=not query.noskip) query.result.cached = False _cache_result(page, query.result, conn, mode) elif query.action == "compare": @@ -110,7 +111,7 @@ def _get_page_by_revid(site, revid): page._load_content(res) return page -def _get_cached_results(page, conn, mode): +def _get_cached_results(page, conn, mode, noskip): query1 = """DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)""" query2 = """SELECT cache_time, cache_queries, cache_process_time @@ -147,6 +148,8 @@ def _get_cached_results(page, conn, mode): return None for url, confidence, skipped in data: + if noskip and skipped: + return None source = CopyvioSource(None, url) source.confidence = confidence source.skipped = bool(skipped) diff --git a/templates/index.mako b/templates/index.mako index 40a32e5..2af1828 100644 --- a/templates/index.mako +++ b/templates/index.mako @@ -193,7 +193,7 @@
  • Redirected from ${query.redirected_from.title | h}. Check the original page.
  • % endif % if skips: -
  • Since a suspected source was found with a high confidence value, some URLs were skipped. Check all URLs.
  • +
  • Since a suspected source was found with a high confidence value, some URLs were skipped. Check all URLs.
  • % endif % if result.cached:
  • Results are cachedTo save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URLs of the checked sources, but neither their content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run. from ${result.cache_age} ago. Originally generated in ${round(result.time, 3)} seconds using ${result.queries} queries. Bypass the cache.