diff --git a/copyvios/checker.py b/copyvios/checker.py index 63589e4..0ff2525 100644 --- a/copyvios/checker.py +++ b/copyvios/checker.py @@ -25,7 +25,8 @@ def do_check(): query.project = query.project.lower() query.all_langs, query.all_projects = get_sites() - if query.project and query.lang and (query.title or query.oldid): + query.submitted = query.project and query.lang and (query.title or query.oldid) + if query.submitted: query.site = get_site(query) if query.site: _get_results(query, follow=query.noredirect is None) @@ -51,7 +52,25 @@ def _get_results(query, follow=True): query.redirected_from = page return _get_results(query, follow=False) - if query.url: + if not query.action: + query.action = "compare" if query.url else "search" + if query.action == "search": + conn = get_cache_db() + use_engine = 1 if query.use_engine else 0 + use_links = 1 if query.use_links else 0 + mode = "{0}:{1}:".format(use_engine, use_links) + if not query.nocache: + query.result = _get_cached_results(page, conn, query, mode) + if not query.result: + query.result = page.copyvio_check( + min_confidence=T_SUSPECT, max_queries=10, max_time=45, + no_searches=not use_engine, no_links=not use_links) + query.result.cached = False + _cache_result(page, query.result, conn, mode) + elif query.action == "compare": + if not query.url: + query.error = "no URL" + return scheme = urlparse(query.url).scheme if not scheme and query.url[0] not in ":/": query.url = "http://" + query.url @@ -63,14 +82,7 @@ def _get_results(query, follow=True): query.result = result query.result.cached = False else: - conn = get_cache_db() - if not query.nocache: - query.result = _get_cached_results(page, conn, query) - if not query.result: - query.result = page.copyvio_check( - min_confidence=T_SUSPECT, max_queries=10, max_time=45) - query.result.cached = False - _cache_result(page, query.result, conn) + query.error = "bad action" def _get_page_by_revid(site, revid): res = site.api_query(action="query", prop="info|revisions", revids=revid, @@ -90,13 +102,13 @@ def _get_page_by_revid(site, revid): page._load_content(res) return page -def _get_cached_results(page, conn, query): +def _get_cached_results(page, conn, query, mode): query1 = """DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)""" query2 = """SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?""" - shahash = sha256(page.get().encode("utf8")).hexdigest() + shahash = sha256(mode + page.get().encode("utf8")).hexdigest() with conn.cursor() as cursor: cursor.execute(query1) @@ -129,13 +141,13 @@ def _format_date(cache_time): return "{0} minutes".format(diff.seconds / 60) return "{0} seconds".format(diff.seconds) -def _cache_result(page, result, conn): +def _cache_result(page, result, conn, mode): query = """INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?) ON DUPLICATE KEY UPDATE cache_url = ?, cache_time = CURRENT_TIMESTAMP, cache_queries = ?, cache_process_time = ?""" - shahash = sha256(page.get().encode("utf8")).hexdigest() + shahash = sha256(mode + page.get().encode("utf8")).hexdigest() args = (page.pageid, shahash, result.url, result.queries, result.time, result.url, result.queries, result.time) with conn.cursor() as cursor: diff --git a/static/script.js b/static/script.js index 783ec35..120b9a9 100644 --- a/static/script.js +++ b/static/script.js @@ -87,3 +87,16 @@ function set_cookie(name, value, days) { function delete_cookie(name) { set_cookie(name, "", -1); } + +$(document).ready(function() { + $("#action-search").change(function() { + $("#cv-cb-engine").prop("disabled", false); + $("#cv-cb-links").prop("disabled", false); + $("#url-box").prop("disabled", true); + }).change(); + $("#action-compare").change(function() { + $("#cv-cb-engine").prop("disabled", true); + $("#cv-cb-links").prop("disabled", true); + $("#url-box").prop("disabled", false); + }); +}); diff --git a/static/style.css b/static/style.css index e2b0ab1..cb16f66 100644 --- a/static/style.css +++ b/static/style.css @@ -18,15 +18,15 @@ h2 { div#header { font-size: 2.5em; font-weight: bold; - margin: 30px 60px 30px 60px; - padding: 10px 15px 10px 15px; + margin: 20px 60px; + padding: 10px 15px; border: 1px solid #777; background-color: #FFF; } div#container { line-height: 1.25; - margin: 0 60px 75px 60px; + margin: 0 60px 65px 60px; padding: 5px 15px 15px 15px; border: 1px solid #777; background-color: #FFF; @@ -50,8 +50,8 @@ div#info-box { } div#cv-result { - padding: 5px 10px 0 10px; - margin: 0 5px 10px 5px; + padding: 10px; + margin: 15px 5px 10px 5px; } table#heading { @@ -62,6 +62,11 @@ table#cv-form { width: 750px; } +table#cv-form-inner { + width: 100%; + border-spacing: 0; +} + table#cv-chain-table { width: 100%; margin-bottom: 10px; @@ -89,13 +94,24 @@ td#cv-col4 { width: 15%; } +td#cv-inner-col1 { + width: 4%; +} + +td#cv-inner-col2 { + width: 22%; +} + +td#cv-inner-col3 { + width: 76%; +} + h2#cv-result-header { - margin-top: 0.2em; - margin-bottom: 0; + margin: 0; } ul#cv-result-list { - margin-top: 0.5em; + margin: 0.5em 0; } a#cv-cached { @@ -115,12 +131,6 @@ a#cv-cached span { color: black; } -div.divider { - border-bottom: 1px solid #AAA; - margin-top: 15px; - margin-bottom: 15px; -} - div.green-box { background-color: #EFE; border: 1px solid #7F7; @@ -155,6 +165,16 @@ input.cv-text { width: 100%; } +input#cv-cb-engine { + margin-left: 0; + margin-right: 5px; +} + +input#cv-cb-links { + margin-left: 20px; + margin-right: 5px; +} + span.cv-hl { background: #FAA; } diff --git a/templates/index.mako b/templates/index.mako index 8423593..289a6ec 100644 --- a/templates/index.mako +++ b/templates/index.mako @@ -5,8 +5,16 @@ <%include file="/support/header.mako" args="title='Earwig\'s Copyvio Detector'"/> <%namespace module="copyvios.highlighter" import="highlight_delta"/>\ <%namespace module="copyvios.misc" import="httpsfix, urlstrip"/>\ -% if query.project and query.lang and (query.title or query.oldid): - % if query.error == "bad URI": +% if query.submitted: + % if query.error == "bad action": +
Unknown action: ${query.action | h}.
+URL comparison mode requires a URL to be entered. Enter one in the text box below, or choose copyvio search mode to look for content similar to the article elsewhere on the web.
+Unsupported URI scheme: ${query.url | h}.
This tool attempts to detect copyright violations in articles. Simply give the title of the page or ID of the revision you want to check and hit Submit. The tool will search for similar content elsewhere on the web using Yahoo! BOSS and then display a report if a match is found. If you give a URL, it will skip the search engine step and directly display a report comparing the article to that particular webpage, like the Duplication Detector.
+This tool attempts to detect copyright violations in articles. Simply give the title of the page or ID of the revision you want to check and hit Submit. The tool will search for similar content elsewhere on the web using Yahoo! BOSS and then display a report if a match is found. If you give a specific URL, it will skip the search engine step and directly display a report comparing the article to that particular webpage, like the Duplication Detector.
Running a full check can take up to 45 seconds if other websites are slow. Please be patient. If you get a timeout, wait a moment and refresh the page.
Specific websites can be skipped (for example, if their content is in the public domain) by being added to the excluded URL list.
% if result: <% hide_comparison = "CopyviosHideComparison" in g.cookies and g.cookies["CopyviosHideComparison"].value == "True" %> -Article: ${highlight_delta(result.article_chain, result.delta_chain)} |
- Source: ${highlight_delta(result.source_chain, result.delta_chain)} |
-
Article: ${highlight_delta(result.article_chain, result.delta_chain)} |
+ Source: ${highlight_delta(result.source_chain, result.delta_chain)} |
+