From 710447a6bf4e22a86d4672a59bd1172944e1dec1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Sep 2014 23:24:05 -0500 Subject: [PATCH] Slightly modified interface; give more options (closes #17). --- copyvios/checker.py | 40 ++++++++----- static/script.js | 13 +++++ static/style.css | 48 +++++++++++----- templates/index.mako | 131 +++++++++++++++++++++++++----------------- templates/settings.mako | 2 +- templates/support/header.mako | 1 + 6 files changed, 153 insertions(+), 82 deletions(-) diff --git a/copyvios/checker.py b/copyvios/checker.py index 63589e4..0ff2525 100644 --- a/copyvios/checker.py +++ b/copyvios/checker.py @@ -25,7 +25,8 @@ def do_check(): query.project = query.project.lower() query.all_langs, query.all_projects = get_sites() - if query.project and query.lang and (query.title or query.oldid): + query.submitted = query.project and query.lang and (query.title or query.oldid) + if query.submitted: query.site = get_site(query) if query.site: _get_results(query, follow=query.noredirect is None) @@ -51,7 +52,25 @@ def _get_results(query, follow=True): query.redirected_from = page return _get_results(query, follow=False) - if query.url: + if not query.action: + query.action = "compare" if query.url else "search" + if query.action == "search": + conn = get_cache_db() + use_engine = 1 if query.use_engine else 0 + use_links = 1 if query.use_links else 0 + mode = "{0}:{1}:".format(use_engine, use_links) + if not query.nocache: + query.result = _get_cached_results(page, conn, query, mode) + if not query.result: + query.result = page.copyvio_check( + min_confidence=T_SUSPECT, max_queries=10, max_time=45, + no_searches=not use_engine, no_links=not use_links) + query.result.cached = False + _cache_result(page, query.result, conn, mode) + elif query.action == "compare": + if not query.url: + query.error = "no URL" + return scheme = urlparse(query.url).scheme if not scheme and query.url[0] not in ":/": query.url = "http://" + query.url @@ -63,14 +82,7 @@ def _get_results(query, follow=True): query.result = result query.result.cached = False else: - conn = get_cache_db() - if not query.nocache: - query.result = _get_cached_results(page, conn, query) - if not query.result: - query.result = page.copyvio_check( - min_confidence=T_SUSPECT, max_queries=10, max_time=45) - query.result.cached = False - _cache_result(page, query.result, conn) + query.error = "bad action" def _get_page_by_revid(site, revid): res = site.api_query(action="query", prop="info|revisions", revids=revid, @@ -90,13 +102,13 @@ def _get_page_by_revid(site, revid): page._load_content(res) return page -def _get_cached_results(page, conn, query): +def _get_cached_results(page, conn, query, mode): query1 = """DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)""" query2 = """SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?""" - shahash = sha256(page.get().encode("utf8")).hexdigest() + shahash = sha256(mode + page.get().encode("utf8")).hexdigest() with conn.cursor() as cursor: cursor.execute(query1) @@ -129,13 +141,13 @@ def _format_date(cache_time): return "{0} minutes".format(diff.seconds / 60) return "{0} seconds".format(diff.seconds) -def _cache_result(page, result, conn): +def _cache_result(page, result, conn, mode): query = """INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?) ON DUPLICATE KEY UPDATE cache_url = ?, cache_time = CURRENT_TIMESTAMP, cache_queries = ?, cache_process_time = ?""" - shahash = sha256(page.get().encode("utf8")).hexdigest() + shahash = sha256(mode + page.get().encode("utf8")).hexdigest() args = (page.pageid, shahash, result.url, result.queries, result.time, result.url, result.queries, result.time) with conn.cursor() as cursor: diff --git a/static/script.js b/static/script.js index 783ec35..120b9a9 100644 --- a/static/script.js +++ b/static/script.js @@ -87,3 +87,16 @@ function set_cookie(name, value, days) { function delete_cookie(name) { set_cookie(name, "", -1); } + +$(document).ready(function() { + $("#action-search").change(function() { + $("#cv-cb-engine").prop("disabled", false); + $("#cv-cb-links").prop("disabled", false); + $("#url-box").prop("disabled", true); + }).change(); + $("#action-compare").change(function() { + $("#cv-cb-engine").prop("disabled", true); + $("#cv-cb-links").prop("disabled", true); + $("#url-box").prop("disabled", false); + }); +}); diff --git a/static/style.css b/static/style.css index e2b0ab1..cb16f66 100644 --- a/static/style.css +++ b/static/style.css @@ -18,15 +18,15 @@ h2 { div#header { font-size: 2.5em; font-weight: bold; - margin: 30px 60px 30px 60px; - padding: 10px 15px 10px 15px; + margin: 20px 60px; + padding: 10px 15px; border: 1px solid #777; background-color: #FFF; } div#container { line-height: 1.25; - margin: 0 60px 75px 60px; + margin: 0 60px 65px 60px; padding: 5px 15px 15px 15px; border: 1px solid #777; background-color: #FFF; @@ -50,8 +50,8 @@ div#info-box { } div#cv-result { - padding: 5px 10px 0 10px; - margin: 0 5px 10px 5px; + padding: 10px; + margin: 15px 5px 10px 5px; } table#heading { @@ -62,6 +62,11 @@ table#cv-form { width: 750px; } +table#cv-form-inner { + width: 100%; + border-spacing: 0; +} + table#cv-chain-table { width: 100%; margin-bottom: 10px; @@ -89,13 +94,24 @@ td#cv-col4 { width: 15%; } +td#cv-inner-col1 { + width: 4%; +} + +td#cv-inner-col2 { + width: 22%; +} + +td#cv-inner-col3 { + width: 76%; +} + h2#cv-result-header { - margin-top: 0.2em; - margin-bottom: 0; + margin: 0; } ul#cv-result-list { - margin-top: 0.5em; + margin: 0.5em 0; } a#cv-cached { @@ -115,12 +131,6 @@ a#cv-cached span { color: black; } -div.divider { - border-bottom: 1px solid #AAA; - margin-top: 15px; - margin-bottom: 15px; -} - div.green-box { background-color: #EFE; border: 1px solid #7F7; @@ -155,6 +165,16 @@ input.cv-text { width: 100%; } +input#cv-cb-engine { + margin-left: 0; + margin-right: 5px; +} + +input#cv-cb-links { + margin-left: 20px; + margin-right: 5px; +} + span.cv-hl { background: #FAA; } diff --git a/templates/index.mako b/templates/index.mako index 8423593..289a6ec 100644 --- a/templates/index.mako +++ b/templates/index.mako @@ -5,8 +5,16 @@ <%include file="/support/header.mako" args="title='Earwig\'s Copyvio Detector'"/> <%namespace module="copyvios.highlighter" import="highlight_delta"/>\ <%namespace module="copyvios.misc" import="httpsfix, urlstrip"/>\ -% if query.project and query.lang and (query.title or query.oldid): - % if query.error == "bad URI": +% if query.submitted: + % if query.error == "bad action": +
+

Unknown action: ${query.action | h}.

+
+ % elif query.error == "no URL": +
+

URL comparison mode requires a URL to be entered. Enter one in the text box below, or choose copyvio search mode to look for content similar to the article elsewhere on the web.

+
+ % elif query.error == "bad URI":

Unsupported URI scheme: ${query.url | h}.

@@ -32,7 +40,7 @@ % endif %endif -

This tool attempts to detect copyright violations in articles. Simply give the title of the page or ID of the revision you want to check and hit Submit. The tool will search for similar content elsewhere on the web using Yahoo! BOSS and then display a report if a match is found. If you give a URL, it will skip the search engine step and directly display a report comparing the article to that particular webpage, like the Duplication Detector.

+

This tool attempts to detect copyright violations in articles. Simply give the title of the page or ID of the revision you want to check and hit Submit. The tool will search for similar content elsewhere on the web using Yahoo! BOSS and then display a report if a match is found. If you give a specific URL, it will skip the search engine step and directly display a report comparing the article to that particular webpage, like the Duplication Detector.

Running a full check can take up to 45 seconds if other websites are slow. Please be patient. If you get a timeout, wait a moment and refresh the page.

Specific websites can be skipped (for example, if their content is in the public domain) by being added to the excluded URL list.

@@ -40,7 +48,7 @@ Site: - http:// + https:// - % else: - - % endif + + + + + + + + + + + +
+ + + + + + +
+ + + +
% if query.nocache or (result and result.cached): - Bypass cache: + - % if query.nocache: - - % else: - - % endif + % endif @@ -114,7 +140,6 @@
% if result: <% hide_comparison = "CopyviosHideComparison" in g.cookies and g.cookies["CopyviosHideComparison"].value == "True" %> -

% if result.confidence >= T_POSSIBLE: @@ -131,44 +156,44 @@ % endif % endif

-
+ - - - - - -
Article:

${highlight_delta(result.article_chain, result.delta_chain)}

Source:

${highlight_delta(result.source_chain, result.delta_chain)}

- + % endif + % if result.url: +
  • ${round(result.confidence * 100, 1)}% confidence of a violation.
  • + % endif + % if query.redirected_from: +
  • Redirected from ${query.redirected_from.title | h}. Check the original page.
  • + % endif + % if result.cached: +
  • + Results are cachedTo save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run. from ${result.cache_age} ago. Retrieved in ${round(result.time, 3)} seconds (originally generated in + % if result.queries: + ${round(result.original_time, 3)}s using ${result.queries} queries). + % else: + ${round(result.original_time, 3)}s). + % endif + Bypass the cache. +
  • + % else: +
  • Results generated in ${round(result.time, 3)} seconds using ${result.queries} queries.
  • + % endif + % if result.queries: +
  • Fun fact: The Wikimedia Foundation paid Yahoo! Inc. $${result.queries * 0.0008} USD for these results.
  • + % endif +
  • ${"Show" if hide_comparison else "Hide"} comparison:
  • + + + + + + +
    Article:

    ${highlight_delta(result.article_chain, result.delta_chain)}

    Source:

    ${highlight_delta(result.source_chain, result.delta_chain)}

    % endif <%include file="/support/footer.mako"/> diff --git a/templates/settings.mako b/templates/settings.mako index 7869b8d..fb0babb 100644 --- a/templates/settings.mako +++ b/templates/settings.mako @@ -14,7 +14,7 @@ Default site: - http:// + https://