Browse Source

Implement noskip.

pull/24/head
Ben Kurtovic 10 years ago
parent
commit
b7bafb29cf
2 changed files with 7 additions and 4 deletions
  1. +6
    -3
      copyvios/checker.py
  2. +1
    -1
      templates/index.mako

+ 6
- 3
copyvios/checker.py View File

@@ -65,11 +65,12 @@ def _get_results(query, follow=True):
return return
mode = "{0}:{1}:".format(use_engine, use_links) mode = "{0}:{1}:".format(use_engine, use_links)
if not query.nocache: if not query.nocache:
query.result = _get_cached_results(page, conn, mode)
query.result = _get_cached_results(page, conn, mode, query.noskip)
if not query.result: if not query.result:
query.result = page.copyvio_check( query.result = page.copyvio_check(
min_confidence=T_SUSPECT, max_queries=10, max_time=45, min_confidence=T_SUSPECT, max_queries=10, max_time=45,
no_searches=not use_engine, no_links=not use_links)
no_searches=not use_engine, no_links=not use_links,
short_circuit=not query.noskip)
query.result.cached = False query.result.cached = False
_cache_result(page, query.result, conn, mode) _cache_result(page, query.result, conn, mode)
elif query.action == "compare": elif query.action == "compare":
@@ -110,7 +111,7 @@ def _get_page_by_revid(site, revid):
page._load_content(res) page._load_content(res)
return page return page


def _get_cached_results(page, conn, mode):
def _get_cached_results(page, conn, mode, noskip):
query1 = """DELETE FROM cache query1 = """DELETE FROM cache
WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)""" WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"""
query2 = """SELECT cache_time, cache_queries, cache_process_time query2 = """SELECT cache_time, cache_queries, cache_process_time
@@ -147,6 +148,8 @@ def _get_cached_results(page, conn, mode):
return None return None


for url, confidence, skipped in data: for url, confidence, skipped in data:
if noskip and skipped:
return None
source = CopyvioSource(None, url) source = CopyvioSource(None, url)
source.confidence = confidence source.confidence = confidence
source.skipped = bool(skipped) source.skipped = bool(skipped)


+ 1
- 1
templates/index.mako View File

@@ -193,7 +193,7 @@
<li>Redirected from <a href="${query.redirected_from.url}">${query.redirected_from.title | h}</a>. <a href="${request.url | httpsfix, h}&amp;noredirect=1">Check the original page.</a></li> <li>Redirected from <a href="${query.redirected_from.url}">${query.redirected_from.title | h}</a>. <a href="${request.url | httpsfix, h}&amp;noredirect=1">Check the original page.</a></li>
% endif % endif
% if skips: % if skips:
<li>Since a suspected source was found with a high confidence value, some URLs were skipped. <a href="javascript:alert('Not implemented yet!');">Check all URLs.</a></li>
<li>Since a suspected source was found with a high confidence value, some URLs were skipped. <a href="${request.url | httpsfix, h}&amp;noskip=1">Check all URLs.</a></li>
% endif % endif
% if result.cached: % if result.cached:
<li>Results are <a id="cv-cached" href="#">cached<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URLs of the checked sources, but neither their content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span></a> from <abbr title="${result.cache_time}">${result.cache_age} ago</abbr>. Originally generated in <span class="mono">${round(result.time, 3)}</span> seconds using <span class="mono">${result.queries}</span> queries. <a href="${request.url | httpsfix, h}&amp;nocache=1">Bypass the cache.</a></li> <li>Results are <a id="cv-cached" href="#">cached<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URLs of the checked sources, but neither their content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span></a> from <abbr title="${result.cache_time}">${result.cache_age} ago</abbr>. Originally generated in <span class="mono">${round(result.time, 3)}</span> seconds using <span class="mono">${result.queries}</span> queries. <a href="${request.url | httpsfix, h}&amp;nocache=1">Bypass the cache.</a></li>


Loading…
Cancel
Save