Ver código fonte

Processing time has been moved into EarwigBot main.

pull/24/head
Ben Kurtovic 12 anos atrás
pai
commit
958ad39739
2 arquivos alterados com 7 adições e 13 exclusões
  1. +3
    -3
      pages/copyvios.mako
  2. +4
    -10
      toolserver/copyvios/checker.py

+ 3
- 3
pages/copyvios.mako Ver arquivo

@@ -91,7 +91,7 @@
<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span>
</a> from ${result.cache_time} (${result.cache_age} ago). <a href="${environ['REQUEST_URI'] | h}&amp;nocache=1">Bypass the cache.</a></li>
% else:
<li>Results generated in <tt>${round(result.tdiff, 3)}</tt> seconds using <tt>${result.queries}</tt> queries.</li>
<li>Results generated in <tt>${round(result.time, 3)}</tt> seconds using <tt>${result.queries}</tt> queries.</li>
% endif
% if "EarwigCVShowDetails" in cookies and cookies["EarwigCVShowDetails"].value == "True":
<li><a id="cv-result-detail-link" href="#cv-result-detail" onclick="copyvio_toggle_details()">Hide details:</a></li>
@@ -108,9 +108,9 @@
<li>Trigrams: <i>Article:</i> <tt>${result.article_chain.size()}</tt> / <i>Source:</i> <tt>${result.source_chain.size()}</tt> / <i>Delta:</i> <tt>${result.delta_chain.size()}</tt></li>
% if result.cached:
% if result.queries:
<li>Retrieved from cache in <tt>${round(result.tdiff, 3)}</tt> seconds (originally generated in <tt>${round(result.original_tdiff, 3)}</tt>s using <tt>${result.queries}</tt> queries; <tt>${round(result.original_tdiff - result.tdiff, 3)}</tt>s saved).</li>
<li>Retrieved from cache in <tt>${round(result.time, 3)}</tt> seconds (originally generated in <tt>${round(result.original_time, 3)}</tt>s using <tt>${result.queries}</tt> queries; <tt>${round(result.original_time - result.time, 3)}</tt>s saved).</li>
% else:
<li>Retrieved from cache in <tt>${round(result.tdiff, 3)}</tt> seconds (originally generated in <tt>${round(result.original_tdiff, 3)}</tt>s; <tt>${round(result.original_tdiff - result.tdiff, 3)}</tt>s saved).</li>
<li>Retrieved from cache in <tt>${round(result.time, 3)}</tt> seconds (originally generated in <tt>${round(result.original_time, 3)}</tt>s; <tt>${round(result.original_time - result.time, 3)}</tt>s saved).</li>
% endif
% endif
% if result.queries:


+ 4
- 10
toolserver/copyvios/checker.py Ver arquivo

@@ -30,15 +30,13 @@ def get_results(bot, site, query):
mci = MarkovChainIntersection(mc1, mc2)
result = CopyvioCheckResult(True, 0.67123, "http://example.com/", 7, mc1, (mc2, mci))
result.cached = False
result.tdiff = time() - tstart
result.time = time() - tstart
# END TEST BLOCK
return page, result

def _get_url_specific_results(page, url):
t_start = time()
result = page.copyvio_compare(url)
result.cached = False
result.tdiff = time() - t_start
return result

def _get_cached_results(page, conn):
@@ -46,7 +44,6 @@ def _get_cached_results(page, conn):
query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?"
pageid = page.pageid()
hash = sha256(page.get()).hexdigest()
t_start = time()

with conn.cursor() as cursor:
cursor.execute(query1)
@@ -55,12 +52,11 @@ def _get_cached_results(page, conn):
if not results:
return None

url, cache_time, num_queries, original_tdiff = results[0]
url, cache_time, num_queries, original_time = results[0]
result = page.copyvio_compare(url)
result.cached = True
result.queries = num_queries
result.tdiff = time() - t_start
result.original_tdiff = original_tdiff
result.original_time = original_time
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
result.cache_age = _format_date(cache_time)
return result
@@ -74,10 +70,8 @@ def _format_date(cache_time):
return "{0} seconds".format(diff.seconds)

def _get_fresh_results(page, conn):
t_start = time()
result = page.copyvio_check(max_queries=10, max_time=45)
result.cached = False
result.tdiff = time() - t_start
_cache_result(page, result, conn)
return result

@@ -92,4 +86,4 @@ def _cache_result(page, result, conn):
if cursor.fetchall():
cursor.execute(query2, (pageid,))
cursor.execute(query3, (pageid, hash, result.url, result.queries,
result.tdiff))
result.time))

Carregando…
Cancelar
Salvar