From 7d426a4eb00a31de8931e38c6ab44e552c4427bb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 16 Jul 2014 03:03:17 -0400 Subject: [PATCH] Support following redirects (closes #6). --- copyvios/checker.py | 12 ++++++++++-- templates/index.mako | 3 +++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/copyvios/checker.py b/copyvios/checker.py index 925703b..93c4a61 100644 --- a/copyvios/checker.py +++ b/copyvios/checker.py @@ -24,10 +24,10 @@ def do_check(): if query.project and query.lang and (query.title or query.oldid): query.site = get_site(query) if query.site: - _get_results(query) + _get_results(query, follow=query.noredirect is None) return query -def _get_results(query): +def _get_results(query, follow=True): if query.oldid: page = query.page = _get_page_by_revid(query.site, query.oldid) if not page: @@ -38,6 +38,14 @@ def _get_results(query): page.get() # Make sure that the page exists before we check it! except (exceptions.PageNotFoundError, exceptions.InvalidPageError): return + if page.is_redirect and follow: + try: + query.title = page.get_redirect_target() + except exceptions.RedirectError: + pass # Something's wrong. Continue checking the original page. + else: + query.redirected_from = page + return _get_results(query, follow=False) if query.url: if urlparse(query.url).scheme not in ["http", "https"]: diff --git a/templates/index.mako b/templates/index.mako index 0713d3f..2ed95a5 100644 --- a/templates/index.mako +++ b/templates/index.mako @@ -130,6 +130,9 @@ % endif % endif
  • ${round(result.confidence * 100, 1)}% confidence of a violation.
  • + % if query.redirected_from: +
  • Redirected from ${query.redirected_from.title | h}. Check the original page.
  • + % endif % if result.cached:
  • Results are cachedTo save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run. from ${result.cache_age} ago. Retrieved in ${round(result.time, 3)} seconds (originally generated in