@@ -1,21 +1,21 @@ | |||||
This is a [copyright violation](https://en.wikipedia.org/wiki/WP:COPYVIO) | This is a [copyright violation](https://en.wikipedia.org/wiki/WP:COPYVIO) | ||||
detector running on [Wikimedia Labs](https://tools.wmflabs.org/copyvios). | |||||
detector running on [Wikimedia Cloud Services](https://copyvios.toolforge.org/). | |||||
It can search the web for content similar to a given article, and graphically | It can search the web for content similar to a given article, and graphically | ||||
compare an article to a specific URL. Some technical details are expanded upon | compare an article to a specific URL. Some technical details are expanded upon | ||||
[in a blog post](http://benkurtovic.com/2014/08/20/copyvio-detector.html). | |||||
[in a blog post](https://benkurtovic.com/2014/08/20/copyvio-detector.html). | |||||
Dependencies | Dependencies | ||||
============ | ============ | ||||
* [earwigbot](https://github.com/earwig/earwigbot) >= 0.1 | * [earwigbot](https://github.com/earwig/earwigbot) >= 0.1 | ||||
* [flask](http://flask.pocoo.org/) >= 0.10.1 | |||||
* [flask](https://flask.palletsprojects.com/) >= 0.10.1 | |||||
* [flask-mako](https://pythonhosted.org/Flask-Mako/) >= 0.3 | * [flask-mako](https://pythonhosted.org/Flask-Mako/) >= 0.3 | ||||
* [mako](http://www.makotemplates.org/) >= 0.7.2 | |||||
* [mako](https://www.makotemplates.org/) >= 0.7.2 | |||||
* [mwparserfromhell](https://github.com/earwig/mwparserfromhell) >= 0.3 | * [mwparserfromhell](https://github.com/earwig/mwparserfromhell) >= 0.3 | ||||
* [oursql](http://packages.python.org/oursql/) >= 0.9.3.1 | |||||
* [requests](http://python-requests.org/) >= 2.9.1 | |||||
* [SQLAlchemy](http://sqlalchemy.org/) >= 0.9.6 | |||||
* [oursql](https://pythonhosted.org/oursql/) >= 0.9.3.1 | |||||
* [requests](https://requests.readthedocs.io/) >= 2.9.1 | |||||
* [SQLAlchemy](https://www.sqlalchemy.org/) >= 0.9.6 | |||||
* [apsw](https://github.com/rogerbinns/apsw) >= 3.26.0 | * [apsw](https://github.com/rogerbinns/apsw) >= 3.26.0 | ||||
* [uglifycss](https://github.com/fmarcia/UglifyCSS/) | * [uglifycss](https://github.com/fmarcia/UglifyCSS/) | ||||
* [uglifyjs](https://github.com/mishoo/UglifyJS/) >= 1.3.3 | * [uglifyjs](https://github.com/mishoo/UglifyJS/) >= 1.3.3 | ||||
@@ -25,7 +25,7 @@ Running | |||||
- If using Tool Labs, you should clone the repository to `~/www/python/src`, or | - If using Tool Labs, you should clone the repository to `~/www/python/src`, or | ||||
otherwise symlink it to that directory. A | otherwise symlink it to that directory. A | ||||
[virtualenv](http://virtualenv.readthedocs.org/) should be created at | |||||
[virtualenv](https://virtualenv.pypa.io/) should be created at | |||||
`~/www/python/venv`. | `~/www/python/venv`. | ||||
- Install all dependencies listed above. | - Install all dependencies listed above. | ||||
@@ -58,7 +58,7 @@ def setup_app(): | |||||
def prepare_request(): | def prepare_request(): | ||||
g._db = None | g._db = None | ||||
g.cookies = parse_cookies( | g.cookies = parse_cookies( | ||||
request.script_root, request.environ.get("HTTP_COOKIE")) | |||||
request.script_root or "/", request.environ.get("HTTP_COOKIE")) | |||||
g.new_cookies = [] | g.new_cookies = [] | ||||
@app.after_request | @app.after_request | ||||
@@ -8,7 +8,7 @@ from .misc import parse_wiki_timestamp | |||||
__all__ = ['search_turnitin', 'TURNITIN_API_ENDPOINT'] | __all__ = ['search_turnitin', 'TURNITIN_API_ENDPOINT'] | ||||
TURNITIN_API_ENDPOINT = 'http://tools.wmflabs.org/eranbot/plagiabot/api.py' | |||||
TURNITIN_API_ENDPOINT = 'https://eranbot.toolforge.org/plagiabot/api.py' | |||||
def search_turnitin(page_title, lang): | def search_turnitin(page_title, lang): | ||||
""" Search the Plagiabot database for Turnitin reports for a page. | """ Search the Plagiabot database for Turnitin reports for a page. | ||||
@@ -2,7 +2,7 @@ | |||||
"name" : "copyvios", | "name" : "copyvios", | ||||
"title" : "Copyvios", | "title" : "Copyvios", | ||||
"description" : "Detects copyright violations in pages by searching for their contents online. Can also compare a page and a specific URL.", | "description" : "Detects copyright violations in pages by searching for their contents online. Can also compare a page and a specific URL.", | ||||
"url" : "https://tools.wmflabs.org/copyvios", | |||||
"url" : "https://copyvios.toolforge.org/", | |||||
"keywords" : "copyvios, copyright violations", | "keywords" : "copyvios, copyright violations", | ||||
"author" : "The Earwig", | "author" : "The Earwig", | ||||
"repository" : "https://github.com/earwig/copyvios" | "repository" : "https://github.com/earwig/copyvios" | ||||
@@ -40,9 +40,9 @@ | |||||
% if help: | % if help: | ||||
<div id="help"> | <div id="help"> | ||||
<h1>Copyvio Detector API</h1> | <h1>Copyvio Detector API</h1> | ||||
<p>This is the first version of the <a href="//en.wikipedia.org/wiki/Application_programming_interface">API</a> for <a href="${request.script_root}">Earwig's Copyvio Detector</a>. It works, but some bugs might still need to be ironed out, so please <a href="https://github.com/earwig/copyvios/issues">report any</a> if you see them.</p> | |||||
<p>This is the first version of the <a href="https://en.wikipedia.org/wiki/Application_programming_interface">API</a> for <a href="${request.script_root}/">Earwig's Copyvio Detector</a>. It works, but some bugs might still need to be ironed out, so please <a href="https://github.com/earwig/copyvios/issues">report any</a> if you see them.</p> | |||||
<h2>Requests</h2> | <h2>Requests</h2> | ||||
<p>The API responds to GET requests made to <span class="code">https://tools.wmflabs.org/copyvios/api.json</span>. Parameters are described in the tables below:</p> | |||||
<p>The API responds to GET requests made to <span class="code">https://copyvios.toolforge.org/api.json</span>. Parameters are described in the tables below:</p> | |||||
<table class="parameters"> | <table class="parameters"> | ||||
<tr> | <tr> | ||||
<th colspan="4">Always</th> | <th colspan="4">Always</th> | ||||
@@ -63,7 +63,7 @@ | |||||
<td>format</td> | <td>format</td> | ||||
<td><span class="code">json</span>, <span class="code">jsonfm</span></td> | <td><span class="code">json</span>, <span class="code">jsonfm</span></td> | ||||
<td>No (default: <span class="code">json</span>)</td> | <td>No (default: <span class="code">json</span>)</td> | ||||
<td>The default output format is <a href="http://json.org/">JSON</a>. <span class="code">jsonfm</span> mode produces the same output, but renders it as a formatted HTML document for debugging.</td> | |||||
<td>The default output format is <a href="https://www.json.org/">JSON</a>. <span class="code">jsonfm</span> mode produces the same output, but renders it as a formatted HTML document for debugging.</td> | |||||
</tr> | </tr> | ||||
<tr> | <tr> | ||||
<td>version</td> | <td>version</td> | ||||
@@ -254,7 +254,7 @@ | |||||
<h2>Etiquette</h2> | <h2>Etiquette</h2> | ||||
The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with <span class="code">use_engine=True</span> are globally limited to around a thousand per day. Be respectful! | The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with <span class="code">use_engine=True</span> are globally limited to around a thousand per day. Be respectful! | ||||
<h2>Example</h2> | <h2>Example</h2> | ||||
<p><a class="no-color" href="https://tools.wmflabs.org/copyvios/api.json?version=1&action=search&project=wikipedia&lang=en&title=User:EarwigBot/Copyvios/Tests/2"><span class="code">https://tools.wmflabs.org/copyvios/api.json?<span class="param-key">version</span>=<span class="param-val">1</span>&<span class="param-key">action</span>=<span class="param-val">search</span>&<span class="param-key">project</span>=<span class="param-val">wikipedia</span>&<span class="param-key">lang</span>=<span class="param-val">en</span>&<span class="param-key">title</span>=<span class="param-val">User:EarwigBot/Copyvios/Tests/2</span></span></a></p> | |||||
<p><a class="no-color" href="https://copyvios.toolforge.org/api.json?version=1&action=search&project=wikipedia&lang=en&title=User:EarwigBot/Copyvios/Tests/2"><span class="code">https://copyvios.toolforge.org/api.json?<span class="param-key">version</span>=<span class="param-val">1</span>&<span class="param-key">action</span>=<span class="param-val">search</span>&<span class="param-key">project</span>=<span class="param-val">wikipedia</span>&<span class="param-key">lang</span>=<span class="param-val">en</span>&<span class="param-key">title</span>=<span class="param-val">User:EarwigBot/Copyvios/Tests/2</span></span></a></p> | |||||
<pre>{ | <pre>{ | ||||
"status": "ok", | "status": "ok", | ||||
"meta": { | "meta": { | ||||
@@ -35,11 +35,11 @@ | |||||
</p></div> | </p></div> | ||||
% elif not query.site: | % elif not query.site: | ||||
<div id="info-box" class="red-box"> | <div id="info-box" class="red-box"> | ||||
<p>The given site (project=<b><span class="mono">${query.project | h}</span></b>, language=<b><span class="mono">${query.lang | h}</span></b>) doesn't seem to exist. It may also be closed or private. <a href="//${query.lang | h}.${query.project | h}.org/">Confirm its URL.</a></p> | |||||
<p>The given site (project=<b><span class="mono">${query.project | h}</span></b>, language=<b><span class="mono">${query.lang | h}</span></b>) doesn't seem to exist. It may also be closed or private. <a href="https://${query.lang | h}.${query.project | h}.org/">Confirm its URL.</a></p> | |||||
</div> | </div> | ||||
% elif query.oldid and not result: | % elif query.oldid and not result: | ||||
<div id="info-box" class="red-box"> | <div id="info-box" class="red-box"> | ||||
<p>The given revision ID doesn't seem to exist: <a href="//${query.site.domain | h}/w/index.php?oldid=${query.oldid | h}">${query.oldid | h}</a>.</p> | |||||
<p>The given revision ID doesn't seem to exist: <a href="https://${query.site.domain | h}/w/index.php?oldid=${query.oldid | h}">${query.oldid | h}</a>.</p> | |||||
</div> | </div> | ||||
% elif query.title and not result: | % elif query.title and not result: | ||||
<div id="info-box" class="red-box"> | <div id="info-box" class="red-box"> | ||||
@@ -47,10 +47,10 @@ | |||||
</div> | </div> | ||||
% endif | % endif | ||||
%endif | %endif | ||||
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. In <i>search mode</i>, it will check for similar content elsewhere on the web using <a href="https://developers.google.com/custom-search/">Google</a>, external links present in the text of the page, or <a href="//en.wikipedia.org/wiki/Wikipedia:Turnitin">Turnitin</a> (provided by <a href="//en.wikipedia.org/wiki/User:EranBot">EranBot</a>), depending on which options are selected. In <i>comparison mode</i>, the tool will compare the article to a specific webpage without making additional searches, like the <a href="//tools.wmflabs.org/dupdet/">Duplication Detector</a>.</p> | |||||
<p>This tool attempts to detect <a href="https://en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. In <i>search mode</i>, it will check for similar content elsewhere on the web using <a href="https://developers.google.com/custom-search/">Google</a>, external links present in the text of the page, or <a href="https://en.wikipedia.org/wiki/Wikipedia:Turnitin">Turnitin</a> (provided by <a href="https://en.wikipedia.org/wiki/User:EranBot">EranBot</a>), depending on which options are selected. In <i>comparison mode</i>, the tool will compare the article to a specific webpage without making additional searches, like the <a href="https://dupdet.toolforge.org/">Duplication Detector</a>.</p> | |||||
<p>Running a full check can take up to a minute if other websites are slow or if the tool is under heavy use. Please be patient. If you get a timeout, wait a moment and refresh the page.</p> | <p>Running a full check can take up to a minute if other websites are slow or if the tool is under heavy use. Please be patient. If you get a timeout, wait a moment and refresh the page.</p> | ||||
<p>Be aware that other websites can copy from Wikipedia, so check the results carefully, especially for older or well-developed articles. Specific websites can be skipped by being added to the <a href="//en.wikipedia.org/wiki/User:EarwigBot/Copyvios/Exclusions">excluded URL list</a>.</p> | |||||
<form id="cv-form" action="${request.script_root}" method="get"> | |||||
<p>Be aware that other websites can copy from Wikipedia, so check the results carefully, especially for older or well-developed articles. Specific websites can be skipped by being added to the <a href="https://en.wikipedia.org/wiki/User:EarwigBot/Copyvios/Exclusions">excluded URL list</a>.</p> | |||||
<form id="cv-form" action="${request.script_root}/" method="get"> | |||||
<table id="cv-form-outer"> | <table id="cv-form-outer"> | ||||
<tr> | <tr> | ||||
<td>Site:</td> | <td>Site:</td> | ||||
@@ -165,7 +165,7 @@ | |||||
% else: | % else: | ||||
seconds. | seconds. | ||||
% endif | % endif | ||||
<a href="${request.script_root | h}?lang=${query.lang | h}&project=${query.project | h}&oldid=${query.oldid or query.page.lastrevid | h}&action=${query.action | h}&${"use_engine={0}&use_links={1}".format(int(query.use_engine not in ("0", "false")), int(query.use_links not in ("0", "false"))) if query.action == "search" else "" | h}${"url=" if query.action == "compare" else ""}${query.url if query.action == "compare" else "" | u}">Permalink.</a> | |||||
<a href="${request.script_root | h}/?lang=${query.lang | h}&project=${query.project | h}&oldid=${query.oldid or query.page.lastrevid | h}&action=${query.action | h}&${"use_engine={0}&use_links={1}".format(int(query.use_engine not in ("0", "false")), int(query.use_links not in ("0", "false"))) if query.action == "search" else "" | h}${"url=" if query.action == "compare" else ""}${query.url if query.action == "compare" else "" | u}">Permalink.</a> | |||||
</div> | </div> | ||||
<div id="cv-result" class="${'red' if result.confidence >= T_SUSPECT else 'yellow' if result.confidence >= T_POSSIBLE else 'green'}-box"> | <div id="cv-result" class="${'red' if result.confidence >= T_SUSPECT else 'yellow' if result.confidence >= T_POSSIBLE else 'green'}-box"> | ||||
@@ -179,11 +179,11 @@ | |||||
<td> | <td> | ||||
<a href="${query.page.url}">${query.page.title | h}</a> | <a href="${query.page.url}">${query.page.title | h}</a> | ||||
% if query.oldid: | % if query.oldid: | ||||
@<a href="//${query.site.domain | h}/w/index.php?oldid=${query.oldid | h}">${query.oldid | h}</a> | |||||
@<a href="https://${query.site.domain | h}/w/index.php?oldid=${query.oldid | h}">${query.oldid | h}</a> | |||||
% endif | % endif | ||||
% if query.redirected_from: | % if query.redirected_from: | ||||
<br /> | <br /> | ||||
<span id="redirected-from">Redirected from <a href="//${query.site.domain | h}/w/index.php?title=${query.redirected_from.title | u}&redirect=no">${query.redirected_from.title | h}</a>. <a href="${request.url | httpsfix, h}&noredirect=1">Check original.</a></span> | |||||
<span id="redirected-from">Redirected from <a href="https://${query.site.domain | h}/w/index.php?title=${query.redirected_from.title | u}&redirect=no">${query.redirected_from.title | h}</a>. <a href="${request.url | httpsfix, h}&noredirect=1">Check original.</a></span> | |||||
% endif | % endif | ||||
</td> | </td> | ||||
<td> | <td> | ||||
@@ -225,7 +225,7 @@ | |||||
% if query.turnitin_result.reports: | % if query.turnitin_result.reports: | ||||
<table id="turnitin-table"><tbody> | <table id="turnitin-table"><tbody> | ||||
% for report in turnitin_result.reports: | % for report in turnitin_result.reports: | ||||
<tr><td class="turnitin-table-cell"><a href="https://tools.wmflabs.org/eranbot/ithenticate.py?rid=${report.reportid}">Report ${report.reportid}</a> for text added at <a href="https://${query.lang}.wikipedia.org/w/index.php?title=${query.title}&diff=${report.diffid}"> ${report.time_posted.strftime("%H:%M, %d %B %Y (UTC)")}</a>: | |||||
<tr><td class="turnitin-table-cell"><a href="https://eranbot.toolforge.org/ithenticate.py?rid=${report.reportid}">Report ${report.reportid}</a> for text added at <a href="https://${query.lang}.wikipedia.org/w/index.php?title=${query.title}&diff=${report.diffid}"> ${report.time_posted.strftime("%H:%M, %d %B %Y (UTC)")}</a>: | |||||
<ul> | <ul> | ||||
% for source in report.sources: | % for source in report.sources: | ||||
<li>${source['percent']}% of revision text (${source['words']} words) found at <a href="${source['url'] | h}">${source['url'] | h}</a></li> | <li>${source['percent']}% of revision text (${source['words']} words) found at <a href="${source['url'] | h}">${source['url'] | h}</a></li> | ||||
@@ -269,7 +269,7 @@ | |||||
% endif | % endif | ||||
</td> | </td> | ||||
<td> | <td> | ||||
<a href="${request.script_root | h}?lang=${query.lang | h}&project=${query.project | h}&oldid=${query.oldid or query.page.lastrevid | h}&action=compare&url=${source.url | u}">Compare</a> | |||||
<a href="${request.script_root | h}/?lang=${query.lang | h}&project=${query.project | h}&oldid=${query.oldid or query.page.lastrevid | h}&action=compare&url=${source.url | u}">Compare</a> | |||||
</td> | </td> | ||||
</tr> | </tr> | ||||
% endfor | % endfor | ||||
@@ -42,7 +42,7 @@ | |||||
</tr> | </tr> | ||||
<% | <% | ||||
background_options = [ | background_options = [ | ||||
("list", 'Randomly select from <a href="http://commons.wikimedia.org/wiki/User:The_Earwig/POTD">a subset</a> of previous <a href="//commons.wikimedia.org/">Wikimedia Commons</a> <a href="//commons.wikimedia.org/wiki/Commons:Picture_of_the_day">Pictures of the Day</a> that work well as widescreen backgrounds, refreshed daily (default).'), | |||||
("list", 'Randomly select from <a href="https://commons.wikimedia.org/wiki/User:The_Earwig/POTD">a subset</a> of previous <a href="https://commons.wikimedia.org/">Wikimedia Commons</a> <a href="https://commons.wikimedia.org/wiki/Commons:Picture_of_the_day">Pictures of the Day</a> that work well as widescreen backgrounds, refreshed daily (default).'), | |||||
("potd", 'Use the current Commons Picture of the Day, unfiltered. Certain POTDs may be unsuitable as backgrounds due to their aspect ratio or subject matter.'), | ("potd", 'Use the current Commons Picture of the Day, unfiltered. Certain POTDs may be unsuitable as backgrounds due to their aspect ratio or subject matter.'), | ||||
("plain", "Use a plain background."), | ("plain", "Use a plain background."), | ||||
] | ] | ||||
@@ -4,13 +4,13 @@ | |||||
%>\ | %>\ | ||||
</div> | </div> | ||||
<div id="footer"> | <div id="footer"> | ||||
<p>Copyright © 2009–${datetime.now().year} <a href="//en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a> • \ | |||||
<p>Copyright © 2009–${datetime.now().year} <a href="https://en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a> • \ | |||||
<a href="${request.script_root}/api">API</a> • \ | <a href="${request.script_root}/api">API</a> • \ | ||||
<a href="https://github.com/earwig/copyvios">Source Code</a> • \ | <a href="https://github.com/earwig/copyvios">Source Code</a> • \ | ||||
% if ("CopyviosBackground" in g.cookies and g.cookies["CopyviosBackground"].value in ["potd", "list"]) or "CopyviosBackground" not in g.cookies: | % if ("CopyviosBackground" in g.cookies and g.cookies["CopyviosBackground"].value in ["potd", "list"]) or "CopyviosBackground" not in g.cookies: | ||||
<a href="${g.descurl | h}">Background</a> • \ | <a href="${g.descurl | h}">Background</a> • \ | ||||
% endif | % endif | ||||
<a href="http://validator.w3.org/check?uri=referer">Valid HTML5</a> | |||||
<a href="https://validator.w3.org/check?uri=referer">Valid HTML5</a> | |||||
</p> | </p> | ||||
</div> | </div> | ||||
</body> | </body> | ||||
@@ -21,7 +21,7 @@ | |||||
<div id="header"> | <div id="header"> | ||||
<table id="heading"> | <table id="heading"> | ||||
<tr> | <tr> | ||||
<td id="head-home"><a id="a-home" href="${request.script_root}">Earwig's Copyvio Detector</a></td> | |||||
<td id="head-home"><a id="a-home" href="${request.script_root}/">Earwig's Copyvio Detector</a></td> | |||||
<td id="head-settings"><a id="a-settings" href="${request.script_root}/settings">Settings</a></td> | <td id="head-settings"><a id="a-settings" href="${request.script_root}/settings">Settings</a></td> | ||||
</tr> | </tr> | ||||
</table> | </table> | ||||