Browse Source

Processing time has been moved into EarwigBot main.

pull/24/head
Ben Kurtovic 11 years ago
parent
commit
958ad39739
2 changed files with 7 additions and 13 deletions
  1. +3
    -3
      pages/copyvios.mako
  2. +4
    -10
      toolserver/copyvios/checker.py

+ 3
- 3
pages/copyvios.mako View File

@@ -91,7 +91,7 @@
<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span> <span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span>
</a> from ${result.cache_time} (${result.cache_age} ago). <a href="${environ['REQUEST_URI'] | h}&amp;nocache=1">Bypass the cache.</a></li> </a> from ${result.cache_time} (${result.cache_age} ago). <a href="${environ['REQUEST_URI'] | h}&amp;nocache=1">Bypass the cache.</a></li>
% else: % else:
<li>Results generated in <tt>${round(result.tdiff, 3)}</tt> seconds using <tt>${result.queries}</tt> queries.</li>
<li>Results generated in <tt>${round(result.time, 3)}</tt> seconds using <tt>${result.queries}</tt> queries.</li>
% endif % endif
% if "EarwigCVShowDetails" in cookies and cookies["EarwigCVShowDetails"].value == "True": % if "EarwigCVShowDetails" in cookies and cookies["EarwigCVShowDetails"].value == "True":
<li><a id="cv-result-detail-link" href="#cv-result-detail" onclick="copyvio_toggle_details()">Hide details:</a></li> <li><a id="cv-result-detail-link" href="#cv-result-detail" onclick="copyvio_toggle_details()">Hide details:</a></li>
@@ -108,9 +108,9 @@
<li>Trigrams: <i>Article:</i> <tt>${result.article_chain.size()}</tt> / <i>Source:</i> <tt>${result.source_chain.size()}</tt> / <i>Delta:</i> <tt>${result.delta_chain.size()}</tt></li> <li>Trigrams: <i>Article:</i> <tt>${result.article_chain.size()}</tt> / <i>Source:</i> <tt>${result.source_chain.size()}</tt> / <i>Delta:</i> <tt>${result.delta_chain.size()}</tt></li>
% if result.cached: % if result.cached:
% if result.queries: % if result.queries:
<li>Retrieved from cache in <tt>${round(result.tdiff, 3)}</tt> seconds (originally generated in <tt>${round(result.original_tdiff, 3)}</tt>s using <tt>${result.queries}</tt> queries; <tt>${round(result.original_tdiff - result.tdiff, 3)}</tt>s saved).</li>
<li>Retrieved from cache in <tt>${round(result.time, 3)}</tt> seconds (originally generated in <tt>${round(result.original_time, 3)}</tt>s using <tt>${result.queries}</tt> queries; <tt>${round(result.original_time - result.time, 3)}</tt>s saved).</li>
% else: % else:
<li>Retrieved from cache in <tt>${round(result.tdiff, 3)}</tt> seconds (originally generated in <tt>${round(result.original_tdiff, 3)}</tt>s; <tt>${round(result.original_tdiff - result.tdiff, 3)}</tt>s saved).</li>
<li>Retrieved from cache in <tt>${round(result.time, 3)}</tt> seconds (originally generated in <tt>${round(result.original_time, 3)}</tt>s; <tt>${round(result.original_time - result.time, 3)}</tt>s saved).</li>
% endif % endif
% endif % endif
% if result.queries: % if result.queries:


+ 4
- 10
toolserver/copyvios/checker.py View File

@@ -30,15 +30,13 @@ def get_results(bot, site, query):
mci = MarkovChainIntersection(mc1, mc2) mci = MarkovChainIntersection(mc1, mc2)
result = CopyvioCheckResult(True, 0.67123, "http://example.com/", 7, mc1, (mc2, mci)) result = CopyvioCheckResult(True, 0.67123, "http://example.com/", 7, mc1, (mc2, mci))
result.cached = False result.cached = False
result.tdiff = time() - tstart
result.time = time() - tstart
# END TEST BLOCK # END TEST BLOCK
return page, result return page, result


def _get_url_specific_results(page, url): def _get_url_specific_results(page, url):
t_start = time()
result = page.copyvio_compare(url) result = page.copyvio_compare(url)
result.cached = False result.cached = False
result.tdiff = time() - t_start
return result return result


def _get_cached_results(page, conn): def _get_cached_results(page, conn):
@@ -46,7 +44,6 @@ def _get_cached_results(page, conn):
query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?" query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?"
pageid = page.pageid() pageid = page.pageid()
hash = sha256(page.get()).hexdigest() hash = sha256(page.get()).hexdigest()
t_start = time()


with conn.cursor() as cursor: with conn.cursor() as cursor:
cursor.execute(query1) cursor.execute(query1)
@@ -55,12 +52,11 @@ def _get_cached_results(page, conn):
if not results: if not results:
return None return None


url, cache_time, num_queries, original_tdiff = results[0]
url, cache_time, num_queries, original_time = results[0]
result = page.copyvio_compare(url) result = page.copyvio_compare(url)
result.cached = True result.cached = True
result.queries = num_queries result.queries = num_queries
result.tdiff = time() - t_start
result.original_tdiff = original_tdiff
result.original_time = original_time
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
result.cache_age = _format_date(cache_time) result.cache_age = _format_date(cache_time)
return result return result
@@ -74,10 +70,8 @@ def _format_date(cache_time):
return "{0} seconds".format(diff.seconds) return "{0} seconds".format(diff.seconds)


def _get_fresh_results(page, conn): def _get_fresh_results(page, conn):
t_start = time()
result = page.copyvio_check(max_queries=10, max_time=45) result = page.copyvio_check(max_queries=10, max_time=45)
result.cached = False result.cached = False
result.tdiff = time() - t_start
_cache_result(page, result, conn) _cache_result(page, result, conn)
return result return result


@@ -92,4 +86,4 @@ def _cache_result(page, result, conn):
if cursor.fetchall(): if cursor.fetchall():
cursor.execute(query2, (pageid,)) cursor.execute(query2, (pageid,))
cursor.execute(query3, (pageid, hash, result.url, result.queries, cursor.execute(query3, (pageid, hash, result.url, result.queries,
result.tdiff))
result.time))

Loading…
Cancel
Save