Browse Source

Slightly modified interface; give more options (closes #17).

pull/24/head
Ben Kurtovic 10 years ago
parent
commit
710447a6bf
6 changed files with 153 additions and 82 deletions
  1. +26
    -14
      copyvios/checker.py
  2. +13
    -0
      static/script.js
  3. +34
    -14
      static/style.css
  4. +78
    -53
      templates/index.mako
  5. +1
    -1
      templates/settings.mako
  6. +1
    -0
      templates/support/header.mako

+ 26
- 14
copyvios/checker.py View File

@@ -25,7 +25,8 @@ def do_check():
query.project = query.project.lower()

query.all_langs, query.all_projects = get_sites()
if query.project and query.lang and (query.title or query.oldid):
query.submitted = query.project and query.lang and (query.title or query.oldid)
if query.submitted:
query.site = get_site(query)
if query.site:
_get_results(query, follow=query.noredirect is None)
@@ -51,7 +52,25 @@ def _get_results(query, follow=True):
query.redirected_from = page
return _get_results(query, follow=False)

if query.url:
if not query.action:
query.action = "compare" if query.url else "search"
if query.action == "search":
conn = get_cache_db()
use_engine = 1 if query.use_engine else 0
use_links = 1 if query.use_links else 0
mode = "{0}:{1}:".format(use_engine, use_links)
if not query.nocache:
query.result = _get_cached_results(page, conn, query, mode)
if not query.result:
query.result = page.copyvio_check(
min_confidence=T_SUSPECT, max_queries=10, max_time=45,
no_searches=not use_engine, no_links=not use_links)
query.result.cached = False
_cache_result(page, query.result, conn, mode)
elif query.action == "compare":
if not query.url:
query.error = "no URL"
return
scheme = urlparse(query.url).scheme
if not scheme and query.url[0] not in ":/":
query.url = "http://" + query.url
@@ -63,14 +82,7 @@ def _get_results(query, follow=True):
query.result = result
query.result.cached = False
else:
conn = get_cache_db()
if not query.nocache:
query.result = _get_cached_results(page, conn, query)
if not query.result:
query.result = page.copyvio_check(
min_confidence=T_SUSPECT, max_queries=10, max_time=45)
query.result.cached = False
_cache_result(page, query.result, conn)
query.error = "bad action"

def _get_page_by_revid(site, revid):
res = site.api_query(action="query", prop="info|revisions", revids=revid,
@@ -90,13 +102,13 @@ def _get_page_by_revid(site, revid):
page._load_content(res)
return page

def _get_cached_results(page, conn, query):
def _get_cached_results(page, conn, query, mode):
query1 = """DELETE FROM cache
WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"""
query2 = """SELECT cache_url, cache_time, cache_queries, cache_process_time
FROM cache
WHERE cache_id = ? AND cache_hash = ?"""
shahash = sha256(page.get().encode("utf8")).hexdigest()
shahash = sha256(mode + page.get().encode("utf8")).hexdigest()

with conn.cursor() as cursor:
cursor.execute(query1)
@@ -129,13 +141,13 @@ def _format_date(cache_time):
return "{0} minutes".format(diff.seconds / 60)
return "{0} seconds".format(diff.seconds)

def _cache_result(page, result, conn):
def _cache_result(page, result, conn, mode):
query = """INSERT INTO cache
VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)
ON DUPLICATE KEY UPDATE
cache_url = ?, cache_time = CURRENT_TIMESTAMP,
cache_queries = ?, cache_process_time = ?"""
shahash = sha256(page.get().encode("utf8")).hexdigest()
shahash = sha256(mode + page.get().encode("utf8")).hexdigest()
args = (page.pageid, shahash, result.url, result.queries, result.time,
result.url, result.queries, result.time)
with conn.cursor() as cursor:


+ 13
- 0
static/script.js View File

@@ -87,3 +87,16 @@ function set_cookie(name, value, days) {
function delete_cookie(name) {
set_cookie(name, "", -1);
}

$(document).ready(function() {
$("#action-search").change(function() {
$("#cv-cb-engine").prop("disabled", false);
$("#cv-cb-links").prop("disabled", false);
$("#url-box").prop("disabled", true);
}).change();
$("#action-compare").change(function() {
$("#cv-cb-engine").prop("disabled", true);
$("#cv-cb-links").prop("disabled", true);
$("#url-box").prop("disabled", false);
});
});

+ 34
- 14
static/style.css View File

@@ -18,15 +18,15 @@ h2 {
div#header {
font-size: 2.5em;
font-weight: bold;
margin: 30px 60px 30px 60px;
padding: 10px 15px 10px 15px;
margin: 20px 60px;
padding: 10px 15px;
border: 1px solid #777;
background-color: #FFF;
}

div#container {
line-height: 1.25;
margin: 0 60px 75px 60px;
margin: 0 60px 65px 60px;
padding: 5px 15px 15px 15px;
border: 1px solid #777;
background-color: #FFF;
@@ -50,8 +50,8 @@ div#info-box {
}

div#cv-result {
padding: 5px 10px 0 10px;
margin: 0 5px 10px 5px;
padding: 10px;
margin: 15px 5px 10px 5px;
}

table#heading {
@@ -62,6 +62,11 @@ table#cv-form {
width: 750px;
}

table#cv-form-inner {
width: 100%;
border-spacing: 0;
}

table#cv-chain-table {
width: 100%;
margin-bottom: 10px;
@@ -89,13 +94,24 @@ td#cv-col4 {
width: 15%;
}

td#cv-inner-col1 {
width: 4%;
}

td#cv-inner-col2 {
width: 22%;
}

td#cv-inner-col3 {
width: 76%;
}

h2#cv-result-header {
margin-top: 0.2em;
margin-bottom: 0;
margin: 0;
}

ul#cv-result-list {
margin-top: 0.5em;
margin: 0.5em 0;
}

a#cv-cached {
@@ -115,12 +131,6 @@ a#cv-cached span {
color: black;
}

div.divider {
border-bottom: 1px solid #AAA;
margin-top: 15px;
margin-bottom: 15px;
}

div.green-box {
background-color: #EFE;
border: 1px solid #7F7;
@@ -155,6 +165,16 @@ input.cv-text {
width: 100%;
}

input#cv-cb-engine {
margin-left: 0;
margin-right: 5px;
}

input#cv-cb-links {
margin-left: 20px;
margin-right: 5px;
}

span.cv-hl {
background: #FAA;
}


+ 78
- 53
templates/index.mako View File

@@ -5,8 +5,16 @@
<%include file="/support/header.mako" args="title='Earwig\'s Copyvio Detector'"/>
<%namespace module="copyvios.highlighter" import="highlight_delta"/>\
<%namespace module="copyvios.misc" import="httpsfix, urlstrip"/>\
% if query.project and query.lang and (query.title or query.oldid):
% if query.error == "bad URI":
% if query.submitted:
% if query.error == "bad action":
<div id="info-box" class="red-box">
<p>Unknown action: <b><span class="mono">${query.action | h}</span></b>.</p>
</div>
% elif query.error == "no URL":
<div id="info-box" class="red-box">
<p>URL comparison mode requires a URL to be entered. Enter one in the text box below, or choose copyvio search mode to look for content similar to the article elsewhere on the web.</p>
</div>
% elif query.error == "bad URI":
<div id="info-box" class="red-box">
<p>Unsupported URI scheme: <a href="${query.url | h}">${query.url | h}</a>.</p>
</div>
@@ -32,7 +40,7 @@
</div>
% endif
%endif
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. Simply give the title of the page or ID of the revision you want to check and hit Submit. The tool will search for similar content elsewhere on the web using <a href="//info.yahoo.com/legal/us/yahoo/boss/pricing/">Yahoo! BOSS</a> and then display a report if a match is found. If you give a URL, it will skip the search engine step and directly display a report comparing the article to that particular webpage, like the <a href="//toolserver.org/~dcoetzee/duplicationdetector/">Duplication Detector</a>.</p>
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. Simply give the title of the page or ID of the revision you want to check and hit Submit. The tool will search for similar content elsewhere on the web using <a href="//developer.yahoo.com/boss/search/">Yahoo! BOSS</a> and then display a report if a match is found. If you give a specific URL, it will skip the search engine step and directly display a report comparing the article to that particular webpage, like the <a href="//tools.wmflabs.org/dupdet/">Duplication Detector</a>.</p>
<p>Running a full check can take up to 45 seconds if other websites are slow. Please be patient. If you get a timeout, wait a moment and refresh the page.</p>
<p>Specific websites can be skipped (for example, if their content is in the public domain) by being added to the <a href="//en.wikipedia.org/wiki/User:EarwigBot/Copyvios/Exclusions">excluded URL list</a>.</p>
<form action="${request.script_root}" method="get">
@@ -40,7 +48,7 @@
<tr>
<td>Site:</td>
<td colspan="3">
<span class="mono">http://</span>
<span class="mono">https://</span>
<select name="lang">
<% selected_lang = query.orig_lang if query.orig_lang else g.cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in g.cookies else g.bot.wiki.get_site().lang %>\
% for code, name in query.all_langs:
@@ -84,24 +92,42 @@
</td>
</tr>
<tr>
<td>URL&nbsp;(optional):</td>
<td>Action:</td>
<td colspan="3">
% if query.url:
<input class="cv-text" type="text" name="url" value="${query.url | h}" />
% else:
<input class="cv-text" type="text" name="url" />
% endif
<table id="cv-form-inner">
<tr>
<td id="cv-inner-col1">
<input id="action-search" type="radio" name="action" value="search" ${'checked="checked"' if (query.action == "search" or not query.action) else ""} />
</td>
<td id="cv-inner-col2"><label for="action-search">Copyvio&nbsp;search:</label></td>
<td id="cv-inner-col3">
<input id="cv-cb-engine" type="checkbox" name="use_engine" value="1" {'checked="checked"' if query.use_engine or not query.submitted} />
<label for"cv-cb-engine">Use&nbsp;search&nbsp;engine</label>
<input id="cv-cb-links" type="checkbox" name="use_links" value="1" {'checked="checked"' if query.use_links or not query.submitted} />
<label for="cv-cb-links">Use&nbsp;links&nbsp;in&nbsp;page</label>
</td>
</tr>
<tr>
<td>
<input id="action-compare" type="radio" name="action" value="compare" ${'checked="checked"' if query.action == "compare" else ""} />
</td>
<td><label for="action-compare">URL&nbsp;comparison:</label></td>
<td>
<input id="url-box" class="cv-text" type="text" name="url"
% if query.url:
value="${query.url | h}"
% endif
/>
</td>
</tr>
</table>
</td>
</tr>
% if query.nocache or (result and result.cached):
<tr>
<td>Bypass&nbsp;cache:</td>
<td><label for="cb-nocache">Bypass&nbsp;cache:</label></td>
<td colspan="3">
% if query.nocache:
<input type="checkbox" name="nocache" value="1" checked="checked" />
% else:
<input type="checkbox" name="nocache" value="1" />
% endif
<input id="cb-nocache" type="checkbox" name="nocache" value="1" ${'checked="checked"' if query.nocache else ""} />
</td>
</tr>
% endif
@@ -114,7 +140,6 @@
</form>
% if result:
<% hide_comparison = "CopyviosHideComparison" in g.cookies and g.cookies["CopyviosHideComparison"].value == "True" %>
<div class="divider"></div>
<div id="cv-result" class="${'red' if result.confidence >= T_SUSPECT else 'yellow' if result.confidence >= T_POSSIBLE else 'green'}-box">
<h2 id="cv-result-header">
% if result.confidence >= T_POSSIBLE:
@@ -131,44 +156,44 @@
% endif
% endif
</h2>
<ul id="cv-result-list">
% if result.confidence < T_POSSIBLE and not query.url:
% if result.url:
<li>Best match: <a href="${result.url | h}">${result.url | urlstrip, h}</a>.</li>
% else:
<li>No matches found.</li>
% endif
% endif
</div>
<ul id="cv-result-list">
% if result.confidence < T_POSSIBLE and query.action == "search":
% if result.url:
<li><b><span class="mono">${round(result.confidence * 100, 1)}%</span></b> confidence of a violation.</li>
% endif
% if query.redirected_from:
<li>Redirected from <a href="${query.redirected_from.url}">${query.redirected_from.title | h}</a>. <a href="${request.url | httpsfix, h}&amp;noredirect=1">Check the original page.</a></li>
% endif
% if result.cached:
<li>
Results are <a id="cv-cached" href="#">cached<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span></a> from <abbr title="${result.cache_time}">${result.cache_age} ago</abbr>. Retrieved in <span class="mono">${round(result.time, 3)}</span> seconds (originally generated in
% if result.queries:
<span class="mono">${round(result.original_time, 3)}</span>s using <span class="mono">${result.queries}</span> queries).
% else:
<span class="mono">${round(result.original_time, 3)}</span>s).
% endif
<a href="${request.url | httpsfix, h}&amp;nocache=1">Bypass the cache.</a>
</li>
<li>Best match: <a href="${result.url | h}">${result.url | urlstrip, h}</a>.</li>
% else:
<li>Results generated in <span class="mono">${round(result.time, 3)}</span> seconds using <span class="mono">${result.queries}</span> queries.</li>
% endif
% if result.queries:
<li><i>Fun fact:</i> The Wikimedia Foundation paid Yahoo! Inc. <a href="http://info.yahoo.com/legal/us/yahoo/search/bosspricing/details.html">$${result.queries * 0.0008} USD</a> for these results.</li>
<li>No matches found.</li>
% endif
<li><a id="cv-chain-link" href="#cv-chain-table" onclick="copyvio_toggle_details()">${"Show" if hide_comparison else "Hide"} comparison:</a></li>
</ul>
<table id="cv-chain-table" style="display: ${'none' if hide_comparison else 'table'};">
<tr>
<td class="cv-chain-cell">Article: <div class="cv-chain-detail"><p>${highlight_delta(result.article_chain, result.delta_chain)}</p></div></td>
<td class="cv-chain-cell">Source: <div class="cv-chain-detail"><p>${highlight_delta(result.source_chain, result.delta_chain)}</p></div></td>
</tr>
</table>
</div>
% endif
% if result.url:
<li><b><span class="mono">${round(result.confidence * 100, 1)}%</span></b> confidence of a violation.</li>
% endif
% if query.redirected_from:
<li>Redirected from <a href="${query.redirected_from.url}">${query.redirected_from.title | h}</a>. <a href="${request.url | httpsfix, h}&amp;noredirect=1">Check the original page.</a></li>
% endif
% if result.cached:
<li>
Results are <a id="cv-cached" href="#">cached<span>To save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URL of the "violated" source, but neither its content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run.</span></a> from <abbr title="${result.cache_time}">${result.cache_age} ago</abbr>. Retrieved in <span class="mono">${round(result.time, 3)}</span> seconds (originally generated in
% if result.queries:
<span class="mono">${round(result.original_time, 3)}</span>s using <span class="mono">${result.queries}</span> queries).
% else:
<span class="mono">${round(result.original_time, 3)}</span>s).
% endif
<a href="${request.url | httpsfix, h}&amp;nocache=1">Bypass the cache.</a>
</li>
% else:
<li>Results generated in <span class="mono">${round(result.time, 3)}</span> seconds using <span class="mono">${result.queries}</span> queries.</li>
% endif
% if result.queries:
<li><i>Fun fact:</i> The Wikimedia Foundation paid Yahoo! Inc. <a href="http://info.yahoo.com/legal/us/yahoo/search/bosspricing/details.html">$${result.queries * 0.0008} USD</a> for these results.</li>
% endif
<li><a id="cv-chain-link" href="#cv-chain-table" onclick="copyvio_toggle_details()">${"Show" if hide_comparison else "Hide"} comparison:</a></li>
</ul>
<table id="cv-chain-table" style="display: ${'none' if hide_comparison else 'table'};">
<tr>
<td class="cv-chain-cell">Article: <div class="cv-chain-detail"><p>${highlight_delta(result.article_chain, result.delta_chain)}</p></div></td>
<td class="cv-chain-cell">Source: <div class="cv-chain-detail"><p>${highlight_delta(result.source_chain, result.delta_chain)}</p></div></td>
</tr>
</table>
% endif
<%include file="/support/footer.mako"/>

+ 1
- 1
templates/settings.mako View File

@@ -14,7 +14,7 @@
<tr>
<td>Default site:</td>
<td>
<span class="mono">http://</span>
<span class="mono">https://</span>
<select name="lang">
<% selected_lang = g.cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in g.cookies else default_lang %>\
% for code, name in langs:


+ 1
- 0
templates/support/header.mako View File

@@ -9,6 +9,7 @@
<meta charset="utf-8">
<title>${title}</title>
<link rel="stylesheet" href="${request.script_root}/static/style.min.css" type="text/css" />
<script src="//code.jquery.com/jquery-1.11.1.min.js" type="text/javascript"></script>
<script src="${request.script_root}/static/script.min.js" type="text/javascript"></script>
</head>
<% selected = g.cookies["CopyviosBackground"].value if "CopyviosBackground" in g.cookies else "list" %>\


Loading…
Cancel
Save