@@ -0,0 +1,22 @@ | |||
[package] | |||
name = "copyvios" | |||
version = "0.1.0" | |||
authors = ["Ben Kurtovic <ben@benkurtovic.com>"] | |||
license = "MIT" | |||
edition = "2021" | |||
[dependencies] | |||
askama = { version = "0.12", features = ["with-axum"] } | |||
askama_axum = "0.3" | |||
axum = "0.6" | |||
mwapi = "0.6.0" | |||
mwapi_responses = "0.4.2" | |||
mwbot = "0.6.1" | |||
parsoid = "0.8.0" | |||
rand = "0.8.5" | |||
thiserror = "1.0.56" | |||
tokio = { version = "1.0", features = ["full"] } | |||
tower = { version = "0.4", features = ["util"] } | |||
tower-http = { version = "0.4", features = ["fs", "trace"] } | |||
tracing = "0.1" | |||
tracing-subscriber = { version = "0.3", features = ["env-filter"] } |
@@ -0,0 +1,70 @@ | |||
use crate::{site::Site, Error, Result}; | |||
use mwapi_responses::prelude::*; | |||
use mwbot::{parsoid::WikinodeIterator, Bot, Page}; | |||
use rand::seq::SliceRandom; | |||
pub struct Background { | |||
pub image_url: String, | |||
pub source_url: String, | |||
} | |||
async fn get_potd_images(bot: &Bot) -> Result<Vec<Page>> { | |||
let page = bot.page("User:The Earwig/POTD")?; | |||
let html = page.html().await?.into_mutable(); | |||
let mut images = Vec::new(); | |||
for link in html.filter_links() { | |||
let target = bot.page(&link.target())?; | |||
if !target.is_file() { | |||
continue; | |||
} | |||
images.push(target); | |||
} | |||
Ok(images) | |||
} | |||
#[query(prop = "imageinfo", iiprop = "url|size|canonicaltitle")] | |||
pub(crate) struct InfoResponse {} | |||
async fn get_background_from_page(bot: &Bot, image: &Page) -> Result<Background> { | |||
let mut resp: InfoResponse = | |||
mwapi_responses::query_api(&bot.api(), [("titles", image.title())]).await?; | |||
let info = resp | |||
.query | |||
.pages | |||
.pop() | |||
.ok_or(Error::NoBackgroundError(format!( | |||
"Background image not found: {}", | |||
{ image.title() } | |||
))); | |||
tracing::info!("info: {:?}", info); | |||
// data = site.api_query( | |||
// action="query", prop="imageinfo", iiprop="url|size|canonicaltitle", | |||
// titles="File:" + filename) | |||
// res = data["query"]["pages"].values()[0]["imageinfo"][0] | |||
// name = res["canonicaltitle"][len("File:"):].replace(" ", "_") | |||
// return name, res["url"], res["descriptionurl"], res["width"], res["height"] | |||
let image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/9/92/Crepuscular_rays_at_Sunset_near_Waterberg_Plateau.jpg/2560px-Crepuscular_rays_at_Sunset_near_Waterberg_Plateau.jpg"; | |||
let source_url = "https://commons.wikimedia.org/wiki/File:Crepuscular_rays_at_Sunset_near_Waterberg_Plateau.jpg"; | |||
Ok(Background { | |||
image_url: String::from(image_url), | |||
source_url: String::from(source_url), | |||
}) | |||
} | |||
pub async fn get_background() -> Result<Background> { | |||
let site = Site::new("wikimedia", "commons"); | |||
let bot = site.bot().await.unwrap(); | |||
let images = get_potd_images(&bot).await?; | |||
let image = images.choose(&mut rand::thread_rng()); | |||
let image = | |||
image.ok_or_else(|| Error::BackgroundError(String::from("no POTD images found")))?; | |||
tracing::info!("Background image: {:?}", image.title()); | |||
get_background_from_page(&bot, image).await | |||
} |
@@ -0,0 +1,17 @@ | |||
use thiserror::Error as ThisError; | |||
pub type Result<T, E = Error> = std::result::Result<T, E>; | |||
#[non_exhaustive] | |||
#[derive(ThisError, Debug)] | |||
pub enum Error { | |||
#[error("API error: {0}")] | |||
ApiError(#[from] mwapi::Error), | |||
#[error("Bot error: {0}")] | |||
BotError(#[from] mwbot::Error), | |||
#[error("Config error: {0}")] | |||
ConfigError(#[from] mwbot::ConfigError), | |||
#[error("Unable to find background image: {0}")] | |||
BackgroundError(String), | |||
// add nobackgroundserror, backgroundunavailableerror.... | |||
} |
@@ -0,0 +1,61 @@ | |||
mod background; | |||
mod error; | |||
mod site; | |||
pub use error::{Error, Result}; | |||
use crate::background::Background; | |||
use askama::Template; | |||
use axum::{routing::get, Router}; | |||
use std::net::SocketAddr; | |||
use tower_http::services::ServeDir; | |||
use tracing_subscriber::filter::LevelFilter; | |||
use tracing_subscriber::EnvFilter; | |||
#[tokio::main] | |||
async fn main() { | |||
let filter = EnvFilter::builder() | |||
.with_default_directive(LevelFilter::INFO.into()) | |||
.from_env() | |||
.expect("invalid RUST_LOG value"); | |||
tracing_subscriber::fmt().with_env_filter(filter).init(); | |||
let app = Router::new() | |||
.route("/", get(index)) | |||
.route("/api", get(api)) | |||
.route("/language", get(language)) | |||
.route("/settings", get(settings)) | |||
.nest_service("/static", ServeDir::new("static")); | |||
let addr = SocketAddr::from(([127, 0, 0, 1], 8080)); | |||
tracing::info!("listening on {}", addr); | |||
axum::Server::bind(&addr) | |||
.serve(app.into_make_service()) | |||
.await | |||
.unwrap(); | |||
} | |||
#[derive(Template)] | |||
#[template(path = "index.html")] | |||
struct IndexTemplate { | |||
background: Background, | |||
} | |||
async fn index() -> IndexTemplate { | |||
let background = background::get_background() | |||
.await | |||
.expect("failed to get background image"); // TODO: handle gracefully | |||
IndexTemplate { background } | |||
} | |||
async fn api() -> &'static str { | |||
"TODO" | |||
} | |||
async fn language() -> &'static str { | |||
"TODO" | |||
} | |||
async fn settings() -> &'static str { | |||
"TODO" | |||
} |
@@ -0,0 +1,37 @@ | |||
use crate::Result; | |||
use mwbot::Bot; | |||
const PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); | |||
fn user_agent() -> String { | |||
format!("EarwigCVDetector/{}", PKG_VERSION) | |||
} | |||
pub struct Site { | |||
project: String, | |||
lang: String, | |||
} | |||
impl Site { | |||
pub fn new<S: ToString>(project: S, lang: S) -> Self { | |||
Site { | |||
project: project.to_string(), | |||
lang: lang.to_string(), | |||
} | |||
} | |||
fn domain(&self) -> String { | |||
format!("{}.{}.org", self.lang, self.project) | |||
} | |||
pub async fn bot(&self) -> Result<Bot, mwbot::ConfigError> { | |||
let domain = self.domain(); | |||
Bot::builder( | |||
format!("https://{}/w/api.php", domain), | |||
format!("https://{}/api/rest_v1", domain), | |||
) | |||
.set_user_agent(user_agent()) | |||
.build() | |||
.await | |||
} | |||
} |
@@ -0,0 +1,222 @@ | |||
html, body { | |||
height: 100%; | |||
margin: 0; | |||
} | |||
body { | |||
line-height: 1.4; | |||
display: flex; | |||
flex-direction: column; | |||
background-color: #eaecf0; | |||
background-attachment: fixed; | |||
background-size: cover; | |||
font-family: -apple-system,'BlinkMacSystemFont','Segoe UI','Roboto','Lato','Helvetica','Arial',sans-serif; | |||
font-size: calc(1em * 0.875); | |||
color: #000; | |||
} | |||
#container-inner { | |||
margin: 1em 1em 4em; | |||
max-width: 100em; | |||
border: 1px solid #c8ccd1; | |||
background-color: #fff; | |||
padding: 1em; | |||
filter: drop-shadow(0 0 10px rgba(0, 0, 0, 0.25)); | |||
} | |||
#a-settings::before { | |||
background-image: linear-gradient(transparent,transparent), url("data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 xmlns:xlink=%22http://www.w3.org/1999/xlink%22 width=%2220%22 height=%2220%22 viewBox=%220 0 20 20%22%3E%3Ctitle%3Esettings%3C/title%3E%3Cg transform=%22translate%2810 10%29%22%3E%3Cpath id=%22a%22 d=%22M1.5-10h-3l-1 6.5h5m0 7h-5l1 6.5h3%22/%3E%3Cuse transform=%22rotate%2845%29%22 xlink:href=%22%23a%22/%3E%3Cuse transform=%22rotate%2890%29%22 xlink:href=%22%23a%22/%3E%3Cuse transform=%22rotate%28135%29%22 xlink:href=%22%23a%22/%3E%3C/g%3E%3Cpath d=%22M10 2.5a7.5 7.5 0 000 15 7.5 7.5 0 000-15v4a3.5 3.5 0 010 7 3.5 3.5 0 010-7%22/%3E%3C/svg%3E"); | |||
background-repeat: no-repeat; | |||
padding-left: 1.67em; | |||
content: ' '; | |||
color: black; | |||
font-size: 0.85em; | |||
opacity: 0.67; | |||
background-size: contain; | |||
} | |||
header { | |||
padding: 0.2em 1em; | |||
background-color: #eaecf0; | |||
} | |||
header > * { | |||
vertical-align: middle; | |||
display: inline-block; | |||
} | |||
#a-language:before { | |||
content: ''; | |||
background-image: url(https://commons.wikimedia.org/w/extensions/UniversalLanguageSelector/lib/jquery.uls/images/language.svg?80b0b); | |||
background-repeat: no-repeat; | |||
padding-left: 1.5em; | |||
opacity: 0.67; | |||
background-size: contain; | |||
} | |||
#a-language { | |||
/* color: #54595d; */ | |||
margin-right: 1em; | |||
} | |||
#cv-tmp1 > * { | |||
vertical-align: middle; | |||
} | |||
.cv-tmp2 { | |||
display: inline-block; | |||
} | |||
.cv-tmp2 > * { | |||
vertical-align: middle; | |||
} | |||
.cv-tmp2:not(:last-child) { | |||
margin-right: 1em; | |||
} | |||
ol, ul { | |||
line-height:1.5; | |||
} | |||
h2{ | |||
margin-bottom:.2em; | |||
} | |||
#cv-page-title { | |||
max-width: 40em; | |||
} | |||
/* #header { | |||
font-size:2.5em; | |||
} */ | |||
/* #container {} */ | |||
#container { | |||
min-height: 40em; | |||
margin: auto; | |||
} | |||
footer{ | |||
width: 100%; | |||
font-size: .90em; | |||
text-align: center; | |||
line-height: 1.5; | |||
border-top: 1px solid #c8ccd1; | |||
background: #fff; | |||
} | |||
footer li { | |||
display:inline; | |||
} | |||
footer li:not(:last-child):after{ | |||
content:" \00b7"; | |||
} | |||
#notice-box{ | |||
padding:10px; | |||
margin:10px 5px; | |||
} | |||
#notice-box p{ | |||
margin:.25em 0; | |||
} | |||
#notice-box ul{padding-left:1.5em;margin:0} | |||
#info-box{padding:0 10px;margin:10px 5px} | |||
#cv-result{padding:5px;margin:10px 5px} | |||
#attribution-warning{padding:1em;margin:15px 5px 10px} | |||
#turnitin-container{padding:5px;margin:15px 5px 10px} | |||
#sources-container{padding:5px 10px;margin:15px 5px 10px;background-color:#eee;border:1px solid #bbb} | |||
#sources-title,#turnitin-title{margin-bottom:-5px;text-align:center;font-weight:700} | |||
#turnitin-summary{padding-bottom:5px;font-style:italic} | |||
#cv-additional{display:none} | |||
#generation-time{margin-right:5px;text-align:right;font-style:italic} | |||
#cv-chain-container{margin:0 4px} | |||
#heading{width:100%} | |||
#cv-form-outer{table-layout: fixed;width: 100%;max-width: 60em;} | |||
.cv-form-inner{table-layout: fixed;width:100%;border-spacing:0;} | |||
#cv-result-sources{width:100%;border-spacing:0 4px;table-layout:fixed} | |||
#cv-result-sources col:first-child{width:80%} | |||
#cv-result-sources col:nth-child(2),#cv-result-sources col:nth-child(3){width:10%} | |||
#cv-result-sources th{text-align:left} | |||
#cv-result-sources tr:nth-child(2n){background-color:#e0e0e0} | |||
#cv-result-sources td:first-child{overflow:hidden;word-wrap:break-word} | |||
#cv-result-head-table{width:100%;text-align:center;table-layout:fixed;border-spacing:0} | |||
#cv-result-head-table col:nth-child(odd){width:42.5%} | |||
#cv-result-head-table col:nth-child(2){width:15%} | |||
#cv-result-head-table td:nth-child(odd){font-size:1.25em;font-weight:700;overflow:hidden;word-wrap:break-word} | |||
#cv-result-head-table td:nth-child(2) div:first-child{font-weight:700} | |||
#cv-result-head-table td:nth-child(2) div:nth-child(2){font-size:2.5em;font-weight:700;line-height:1} | |||
#cv-result-head-table td:nth-child(2) div:nth-child(3){font-size:.8em} | |||
#cv-chain-table{margin-bottom:10px} | |||
#cv-chain-table,#turnitin-table{width:100%;border-spacing:0;table-layout:fixed} | |||
#turnitin-table{word-wrap:break-word} | |||
#source-row-selected{background-color:#cfcfcf!important} | |||
#head-settings{text-align:right} | |||
#cv-col1{width:15%} | |||
#cv-col2{width:55%} | |||
#cv-col3{text-align:center} | |||
#cv-col3,#cv-col4{width:15%} | |||
#cv-inner-col1{width:4%} | |||
#cv-inner-col2{width:22%} | |||
#cv-inner-col3{width:76%} | |||
#cv-result-header{margin:0} | |||
#redirected-from{font-size:.75em;font-weight:400} | |||
#redirected-from,#result-head-no-sources{font-style:italic} | |||
#source-selected{font-weight:700} | |||
#cv-cached{position:relative} | |||
#cv-cached span{display:none;position:absolute;top:20px;left:-50px;width:500px;padding:5px;z-index:1;background:#f3f3f3;border:1px solid #aaa;color:#000;font-style:normal;text-align:left} | |||
.green-box{background-color:#efe;border:1px solid #7f7} | |||
.yellow-box{background-color:#ffd;border:1px solid #ee5} | |||
.red-box{background-color:#fee;border:1px solid #f77} | |||
.gray-box{background-color:#eee;border:1px solid #aaa} | |||
.indentable{white-space:pre-wrap} | |||
.cv-source-footer{padding-bottom:5px;font-style:italic} | |||
.cv-source-footer a{font-style:normal} | |||
.cv-chain-detail{padding:0 10px;background-color:#fff;border:1px solid #bbb} | |||
.cv-chain-cell{vertical-align:top;word-wrap:break-word} | |||
.cv-chain-cell:first-child{padding-right:5px} | |||
.cv-chain-cell:last-child{padding-left:5px} | |||
.turnitin-table-cell{padding:.5em 0 .3em} | |||
.turnitin-table-cell ul{margin:.2em 0 0;line-height:1.4} | |||
.cv-text{width:100%;} | |||
.cv-search{margin-left:0;margin-right:5px} | |||
.cv-search~.cv-search{margin-left:20px} | |||
.cv-hl{background:#faa} | |||
.cv-hl-in{background:#fcc;background:linear-gradient(270deg,#faa,#fff)} | |||
.cv-hl-out{background:#fcc;background:linear-gradient(90deg,#faa,#fff)} | |||
.mono{font-family:monospace} | |||
.light{color:#ccc} | |||
.medium{color:#aaa} | |||
.source-similarity{font-weight:700} | |||
.source-suspect{color:#900} | |||
.source-possible{color:#990} | |||
.source-novio{color:#090} | |||
.source-excluded,.source-skipped{font-style:italic} | |||
a:link,a:visited{color: #002bb8;text-decoration:none;} | |||
/* a:hover{ color:#040 } */ | |||
a:active,a:hover{text-decoration:underline} | |||
a:active{color:#404} | |||
#a-home:link,#a-home:visited{margin-right: 1em;font-size: 2em;/* font-weight: bold; */color: #54595d;} | |||
#a-home:hover{color:#555} | |||
#a-home:active{color:#333} | |||
/* #a-settings:link,#a-settings:visited{color: #54595d;} */ | |||
/* #a-settings:hover{} */ | |||
#a-settings:active{color:#666} | |||
#cv-cached:active{color:#040} | |||
#cv-cached:active,#cv-cached:hover{text-decoration:none} | |||
#cv-cached:hover span{display:block} | |||
.source-url:link,.source-url:visited{color:#357} | |||
.source-url:hover{color:#035} | |||
.source-url:active{color:#404} |
@@ -0,0 +1,37 @@ | |||
<html lang="en"> | |||
<head> | |||
<meta charset="utf-8"> | |||
<title> | |||
{% if title %} | |||
{{ title }} - | |||
{% endif %} | |||
Earwig's Copyvio Detector | |||
</title> | |||
<link rel="stylesheet" href="/static/style.css"><!-- TODO: add hash --> | |||
<style> | |||
/* TODO: add dynamic background style */ | |||
</style> | |||
</head> | |||
<body style="background-image: url('{{ background.image_url }}');"> | |||
<div id="container"> | |||
<div id="container-inner"> | |||
<header> | |||
<a id="a-home" href="/"> | |||
Earwig's <strong>Copyvio Detector</strong></a> | |||
<a id="a-language" href="/language">Language</a> | |||
<a id="a-settings" href="/settings">Settings</a> | |||
</header> | |||
{% block content %} | |||
{% endblock %} | |||
</div> | |||
</div> | |||
<footer> | |||
<ul> | |||
<li>Maintained by <a href="https://en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a></li> | |||
<li><a href="/api">API</a></li> | |||
<li><a href="https://github.com/earwig/copyvios">Source code</a></li> | |||
<li><a href="{{ background.source_url }}">Background image</a></li> | |||
</ul> | |||
</footer> | |||
</body> | |||
</html> |
@@ -0,0 +1,114 @@ | |||
<html lang="en"> | |||
<head> | |||
<meta charset="utf-8"> | |||
<title>Earwig's Copyvio Detector</title> | |||
<link rel="stylesheet" href="/static/style.css"><!-- TODO: add hash --> | |||
<style> | |||
/* TODO: add dynamic background style */ | |||
</style> | |||
</head> | |||
<body style="background-image: url('{{ background.image_url }}');"> | |||
<div id="container"> | |||
<div id="container-inner"> | |||
<header> | |||
<a id="a-home" href="/"> | |||
Earwig's <strong>Copyvio Detector</strong></a> | |||
<a id="a-language" href="/language">Language</a> | |||
<a id="a-settings" href="/settings">Settings</a> | |||
</header> | |||
<p>This tool attempts to detect <a href="https://en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in Wikipedia articles. In <i>search mode</i>, it will check for similar content elsewhere on the web using <a href="https://developers.google.com/custom-search/">Google</a>, external links present in the page, or <a href="https://en.wikipedia.org/wiki/Wikipedia:Turnitin">Turnitin</a> (via <a href="https://en.wikipedia.org/wiki/User:EranBot">EranBot</a>), depending on the selected options. In <i>comparison mode</i>, the tool will compare the article to a specific webpage without making any searches, like the <a href="https://dupdet.toolforge.org/">Duplication Detector</a>.</p> | |||
<p>Be aware that other websites can copy from Wikipedia, so check the results carefully, especially for older or well-developed articles. Specific websites can be skipped by being added to the <a href="https://en.wikipedia.org/wiki/User:EarwigBot/Copyvios/Exclusions">excluded URL list</a>.</p> | |||
<form id="cv-form" action="/" method="get"> | |||
<table id="cv-form-outer"> | |||
<tr> | |||
<td style="width: 4em;">Site:</td> | |||
<td colspan="3"> | |||
<select name="lang" style="width: 10em;"> | |||
<option value="en" selected="selected">en (English)</option> | |||
</select> | |||
<select name="project" style="margin-left: 1em;"> | |||
<option value="wikipedia" selected="selected">Wikipedia</option> | |||
</select> | |||
</td> | |||
</tr> | |||
<tr> | |||
<td id="cv-col1">Page:</td> | |||
<td id="cv-col2" colspan="3"> | |||
<table class="cv-form-inner"> | |||
<tr> | |||
<td style="width: 8em;"> | |||
<label for="cv-page-title">Title:</label> | |||
</td> | |||
<td> | |||
<input class="cv-text" type="text" name="title" id="cv-page-title"> | |||
</td> | |||
</tr> | |||
<tr> | |||
<td> | |||
<label for="cv-rev-id">or revision ID:</label> | |||
</td> | |||
<td> | |||
<input class="cv-text" type="text" name="oldid" id="cv-rev-id" style="width: 10em;"> | |||
</td> | |||
</tr> | |||
</table> | |||
</td> | |||
</tr> | |||
<tr> | |||
<td>Action:</td> | |||
<td colspan="3"> | |||
<table class="cv-form-inner"> | |||
<tr> | |||
<td style="width: 12em;"> | |||
<input id="action-search" type="radio" name="action" value="search" checked="checked" style="vertical-align: middle;"> | |||
<label for="action-search" style="vertical-align: middle;">Copyvio search:</label> | |||
</td> | |||
<td id="cv-tmp1"> | |||
<div class="cv-tmp2"> | |||
<input type="hidden" name="use_engine" value="0"> | |||
<input id="cv-cb-engine" class="cv-search" type="checkbox" name="use_engine" value="1" checked="checked"> | |||
<label for="cv-cb-engine">Use search engine</label> | |||
</div> | |||
<div class="cv-tmp2"> | |||
<input type="hidden" name="use_links" value="0"> | |||
<input id="cv-cb-links" class="cv-search" type="checkbox" name="use_links" value="1" checked="checked"> | |||
<label for="cv-cb-links">Use links in page</label> | |||
</div> | |||
<div class="cv-tmp2"> | |||
<input type="hidden" name="turnitin" value="0"> | |||
<input id="cv-cb-turnitin" class="cv-search" type="checkbox" name="turnitin" value="1"> | |||
<label for="cv-cb-turnitin">Use Turnitin</label> | |||
</div> | |||
</td> | |||
</tr> | |||
<tr> | |||
<td> | |||
<input id="action-compare" type="radio" name="action" value="compare" style="vertical-align: middle;"> | |||
<label for="action-compare" style="vertical-align: middle;">URL comparison:</label> | |||
</td> | |||
<td> | |||
<input class="cv-compare cv-text" type="text" name="url" disabled=""> | |||
</td> | |||
</tr> | |||
</table> | |||
</td> | |||
</tr> | |||
<tr> | |||
<td colspan="4"> | |||
<input type="submit"> | |||
</td> | |||
</tr> | |||
</table> | |||
</form> | |||
</div> | |||
</div> | |||
<footer> | |||
<ul> | |||
<li>Maintained by <a href="https://en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a></li> | |||
<li><a href="/api">API</a></li> | |||
<li><a href="https://github.com/earwig/copyvios">Source code</a></li> | |||
<li><a href="{{ background.source_url }}">Background image</a></li> | |||
</ul> | |||
</footer> | |||
</body> | |||
</html> |