Ver a proveniência

Support following redirects (closes #6).

pull/24/head
Ben Kurtovic há 10 anos
ascendente
cometimento
7d426a4eb0
2 ficheiros alterados com 13 adições e 2 eliminações
  1. +10
    -2
      copyvios/checker.py
  2. +3
    -0
      templates/index.mako

+ 10
- 2
copyvios/checker.py Ver ficheiro

@@ -24,10 +24,10 @@ def do_check():
if query.project and query.lang and (query.title or query.oldid):
query.site = get_site(query)
if query.site:
_get_results(query)
_get_results(query, follow=query.noredirect is None)
return query

def _get_results(query):
def _get_results(query, follow=True):
if query.oldid:
page = query.page = _get_page_by_revid(query.site, query.oldid)
if not page:
@@ -38,6 +38,14 @@ def _get_results(query):
page.get() # Make sure that the page exists before we check it!
except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
return
if page.is_redirect and follow:
try:
query.title = page.get_redirect_target()
except exceptions.RedirectError:
pass # Something's wrong. Continue checking the original page.
else:
query.redirected_from = page
return _get_results(query, follow=False)

if query.url:
if urlparse(query.url).scheme not in ["http", "https"]:


+ 3
- 0
templates/index.mako Ver ficheiro

@@ -130,6 +130,9 @@
% endif
% endif
<li><b><span class="mono">${round(result.confidence * 100, 1)}%</span></b> confidence of a violation.</li>
% if query.redirected_from:
<li>Redirected from <a href="${query.redirected_from.url}">${query.redirected_from.title | h}</a>. <a href="${request.url | httpsfix, h}&amp;noredirect=1">Check the original page.</a></li>
% endif
% if result.cached:
<li>
Results are <a id="cv-cached" href="#">cached<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span></a> from <abbr title="${result.cache_time}">${result.cache_age} ago</abbr>. Retrieved in <span class="mono">${round(result.time, 3)}</span> seconds (originally generated in


Carregando…
Cancelar
Guardar