From 282b2d748568f73f1459ee3aee6799c1bc9d33ca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Nov 2024 23:09:32 -0500 Subject: [PATCH] Mostly finish Jinja refactoring --- README.md | 12 +- app.py | 107 ++++--- pyproject.toml | 2 - src/copyvios/__init__.py | 13 + src/copyvios/api.py | 10 +- src/copyvios/background.py | 24 +- src/copyvios/cache.py | 2 +- src/copyvios/checker.py | 5 +- src/copyvios/highlighter.py | 4 +- src/copyvios/misc.py | 31 +- {static => src/copyvios/static}/api.css | 9 - src/copyvios/static/api.min.css | 1 + {static => src/copyvios/static}/css/style.css | 0 src/copyvios/static/favicon.ico | Bin 0 -> 15086 bytes {static => src/copyvios/static}/script.js | 0 {static => src/copyvios/static}/script.min.js | 0 {static => src/copyvios/static}/style.min.css | 0 {static => src/copyvios/static}/toolinfo.json | 0 src/copyvios/templates/api_help.html.jinja | 285 ++++++++++++++++++ src/copyvios/templates/api_result.html.jinja | 14 + src/copyvios/templates/error.html.jinja | 11 + src/copyvios/templates/index.html.jinja | 54 ++++ src/copyvios/templates/settings.html.jinja | 93 ++++++ src/copyvios/templates/support/base.html.jinja | 50 ++++ src/copyvios/templates/support/cv_form.html.jinja | 113 +++++++ .../templates/support/cv_result.html.jinja | 147 ++++++++++ static/api.min.css | 1 - templates/api.mako | 326 --------------------- templates/error.mako | 7 - templates/index.mako | 323 -------------------- templates/settings.mako | 100 ------- templates/support/footer.mako | 20 -- templates/support/header.mako | 34 --- 33 files changed, 900 insertions(+), 898 deletions(-) rename {static => src/copyvios/static}/api.css (93%) create mode 100644 src/copyvios/static/api.min.css rename {static => src/copyvios/static}/css/style.css (100%) create mode 100644 src/copyvios/static/favicon.ico rename {static => src/copyvios/static}/script.js (100%) rename {static => src/copyvios/static}/script.min.js (100%) rename {static => src/copyvios/static}/style.min.css (100%) rename {static => src/copyvios/static}/toolinfo.json (100%) create mode 100644 src/copyvios/templates/api_help.html.jinja create mode 100644 src/copyvios/templates/api_result.html.jinja create mode 100644 src/copyvios/templates/error.html.jinja create mode 100644 src/copyvios/templates/index.html.jinja create mode 100644 src/copyvios/templates/settings.html.jinja create mode 100644 src/copyvios/templates/support/base.html.jinja create mode 100644 src/copyvios/templates/support/cv_form.html.jinja create mode 100644 src/copyvios/templates/support/cv_result.html.jinja delete mode 100644 static/api.min.css delete mode 100644 templates/api.mako delete mode 100644 templates/error.mako delete mode 100644 templates/index.mako delete mode 100644 templates/settings.mako delete mode 100644 templates/support/footer.mako delete mode 100644 templates/support/header.mako diff --git a/README.md b/README.md index a6158ce..28649b2 100644 --- a/README.md +++ b/README.md @@ -32,10 +32,14 @@ Installation In `.earwigbot/config.yml`, fill out the connection info for the database by adding the following to the `wiki` section: - copyvios: - engine: mysql - host: - db: + copyvios: + oauth: + consumer_token: + consumer_secret: + sql: + engine: mysql + host: + db: Running ======= diff --git a/app.py b/app.py index de91a0a..eaf1ad0 100755 --- a/app.py +++ b/app.py @@ -3,54 +3,63 @@ import functools import hashlib import json -import logging import os import time import traceback -from collections.abc import Callable -from logging.handlers import TimedRotatingFileHandler -from typing import Any, ParamSpec +from typing import Any from earwigbot.wiki.copyvios import globalize -from flask import Flask, Response, make_response, request -from flask_mako import MakoTemplates, TemplateError, render_template +from flask import Response, make_response, render_template, request +from copyvios import app from copyvios.api import format_api_error, handle_api_request +from copyvios.attribution import get_attribution_info +from copyvios.background import get_background from copyvios.cache import cache -from copyvios.checker import CopyvioCheckError, do_check -from copyvios.cookies import get_new_cookies -from copyvios.misc import get_notice +from copyvios.checker import ( + T_POSSIBLE, + T_SUSPECT, + CopyvioCheckError, + ErrorCode, + do_check, +) +from copyvios.cookies import get_cookies, get_new_cookies +from copyvios.highlighter import highlight_delta +from copyvios.misc import get_notice, get_permalink from copyvios.query import CheckQuery from copyvios.settings import process_settings from copyvios.sites import update_sites -app = Flask(__name__) -MakoTemplates(app) +AnyResponse = Response | str | bytes -hand = TimedRotatingFileHandler("logs/app.log", when="midnight", backupCount=7) -hand.setLevel(logging.DEBUG) -app.logger.addHandler(hand) app.logger.info(f"Flask server started {time.asctime()}") globalize(num_workers=8) -AnyResponse = Response | str | bytes -P = ParamSpec("P") - -def catch_errors(func: Callable[P, AnyResponse]) -> Callable[P, AnyResponse]: - @functools.wraps(func) - def inner(*args: P.args, **kwargs: P.kwargs) -> AnyResponse: - try: - return func(*args, **kwargs) - except TemplateError as exc: - app.logger.error(f"Caught exception:\n{exc.text}") - return render_template("error.mako", traceback=exc.text) - except Exception: - app.logger.exception("Caught exception:") - return render_template("error.mako", traceback=traceback.format_exc()) - - return inner +@app.errorhandler(Exception) +def handle_errors(exc: Exception) -> AnyResponse: + if app.debug: + raise # Use built-in debugger + app.logger.exception("Caught exception:") + return render_template("error.html.jinja", traceback=traceback.format_exc()) + + +@app.context_processor +def setup_context() -> dict[str, Any]: + return { + "T_POSSIBLE": T_POSSIBLE, + "T_SUSPECT": T_SUSPECT, + "ErrorCode": ErrorCode, + "cache": cache, + "dump_json": json.dumps, + "get_attribution_info": get_attribution_info, + "get_background": get_background, + "get_cookies": get_cookies, + "get_notice": get_notice, + "get_permalink": get_permalink, + "highlight_delta": highlight_delta, + } @app.after_request @@ -92,51 +101,47 @@ app.url_build_error_handlers.append(external_url_handler) @app.route("/") -@catch_errors def index() -> AnyResponse: - notice = get_notice() update_sites() query = CheckQuery.from_get_args() try: result = do_check(query) error = None except CopyvioCheckError as exc: + app.logger.exception(f"Copyvio check failed on {query}") result = None error = exc + return render_template( - "index.mako", - notice=notice, + "index.html.jinja", query=query, result=result, error=error, + splash=not result, ) @app.route("/settings", methods=["GET", "POST"]) -@catch_errors def settings() -> AnyResponse: status = process_settings() if request.method == "POST" else None update_sites() - default = cache.bot.wiki.get_site() - kwargs = { - "status": status, - "default_lang": default.lang, - "default_project": default.project, - } - return render_template("settings.mako", **kwargs) + return render_template( + "settings.html.jinja", + status=status, + default_site=cache.bot.wiki.get_site(), + splash=True, + ) @app.route("/api") -@catch_errors def api() -> AnyResponse: - return render_template("api.mako", help=True) + return render_template("api_help.html.jinja") @app.route("/api.json") -@catch_errors def api_json() -> AnyResponse: if not request.args: - return render_template("api.mako", help=True) + return render_template("api_help.html.jinja") format = request.args.get("format", "json") if format in ["json", "jsonfm"]: @@ -144,18 +149,26 @@ def api_json() -> AnyResponse: try: result = handle_api_request() except Exception as exc: + app.logger.exception("API request failed") result = format_api_error("unhandled_exception", exc) else: errmsg = f"Unknown format: {format!r}" result = format_api_error("unknown_format", errmsg) if format == "jsonfm": - return render_template("api.mako", help=False, result=result) + return render_template("api_result.html.jinja", result=result) resp = make_response(json.dumps(result)) resp.mimetype = "application/json" resp.headers["Access-Control-Allow-Origin"] = "*" return resp +if app.debug: + # Silence browser 404s when testing + @app.route("/favicon.ico") + def favicon() -> AnyResponse: + return app.send_static_file("favicon.ico") + + if __name__ == "__main__": app.run() diff --git a/pyproject.toml b/pyproject.toml index bb5617e..1c3e138 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,8 +11,6 @@ dependencies = [ "earwigbot[sql,copyvios] >= 0.4", "mwparserfromhell >= 0.6", "flask >= 3.0", - "flask-mako >= 0.4", - "mako >= 1.3.5", "requests >= 2.32.3", "pydantic >= 2.9.2", "SQLAlchemy >= 2.0.32", diff --git a/src/copyvios/__init__.py b/src/copyvios/__init__.py index e69de29..9fe750a 100644 --- a/src/copyvios/__init__.py +++ b/src/copyvios/__init__.py @@ -0,0 +1,13 @@ +import logging +from logging.handlers import TimedRotatingFileHandler + +from flask import Flask + +app = Flask("copyvios") + +app.jinja_options["trim_blocks"] = True +app.jinja_options["lstrip_blocks"] = True + +hand = TimedRotatingFileHandler("logs/app.log", when="midnight", backupCount=7) +hand.setLevel(logging.DEBUG) +app.logger.addHandler(hand) diff --git a/src/copyvios/api.py b/src/copyvios/api.py index be8ca26..c70f4da 100644 --- a/src/copyvios/api.py +++ b/src/copyvios/api.py @@ -62,8 +62,8 @@ def _serialize_detail(result: CopyvioCheckResult) -> dict[str, Any] | None: if not result.best: return None source_chain, delta = result.best.chains - article = highlight_delta(None, result.article_chain, delta) - source = highlight_delta(None, source_chain, delta) + article = highlight_delta(result.article_chain, delta) + source = highlight_delta(source_chain, delta) return {"article": article, "source": source} @@ -136,7 +136,11 @@ def _hook_check(query: APIQuery) -> dict[str, Any]: def _hook_sites(query: APIQuery) -> dict[str, Any]: update_sites() - return {"status": "ok", "langs": cache.langs, "projects": cache.projects} + return { + "status": "ok", + "langs": [[lang.code, lang.name] for lang in cache.langs], + "projects": [[project.code, project.name] for project in cache.projects], + } _HOOKS = { diff --git a/src/copyvios/background.py b/src/copyvios/background.py index f9806c1..4f07596 100644 --- a/src/copyvios/background.py +++ b/src/copyvios/background.py @@ -13,7 +13,6 @@ from typing import Self from earwigbot import exceptions from earwigbot.wiki import Site -from flask import g from .cache import cache from .cookies import get_cookies @@ -79,16 +78,20 @@ def _get_fresh_from_potd() -> BackgroundInfo | None: site = _get_commons_site() date = datetime.now(UTC).strftime("%Y-%m-%d") page = site.get_page(f"Template:Potd/{date}") - regex = r"\{\{Potd filename\|(?:1=)?(.*?)\|.*?\}\}" + filename = None try: - match = re.search(regex, page.get()) + code = page.parse() + for tmpl in code.ifilter_templates( + matches=lambda tmpl: tmpl.name.matches("Potd filename") + ): + filename = tmpl.get(1).value.strip_code().strip() + break except exceptions.EarwigBotError: logger.exception(f"Failed to load today's POTD from {page.title!r}") return None - if not match: + if not filename: logger.exception(f"Failed to extract POTD from {page.title!r}") return None - filename = match.group(1) return _load_file(site, filename) @@ -144,7 +147,10 @@ def _get_background(selected: str) -> BackgroundInfo | None: return _BACKGROUND_CACHE[selected] -def get_background(selected: str) -> str: +def get_background(selected: str) -> tuple[str | None, str | None]: + if selected == "plain": + return None, None + cookies = get_cookies() if "CopyviosScreenCache" in cookies: cookie = cookies["CopyviosScreenCache"].value @@ -155,8 +161,6 @@ def get_background(selected: str) -> str: background = _get_background(selected) if background: bg_url = _build_url(screen, background) - g.descurl = background.descurl + return bg_url, background.descurl else: - bg_url = "" - g.descurl = None - return bg_url + return None, None diff --git a/src/copyvios/cache.py b/src/copyvios/cache.py index ee1419a..8d4eabb 100644 --- a/src/copyvios/cache.py +++ b/src/copyvios/cache.py @@ -38,7 +38,7 @@ def setup_connection(dbapi_connection: Any, connection_record: Any) -> None: def _get_engine(bot: Bot) -> sqlalchemy.Engine: - args = bot.config.wiki["copyvios"].copy() + args = bot.config.wiki.get("copyvios", {}).get("sql", {}).copy() engine_name = args.pop("engine", "mysql").lower() if engine_name == "mysql": diff --git a/src/copyvios/checker.py b/src/copyvios/checker.py index 6c4b165..11d91f5 100644 --- a/src/copyvios/checker.py +++ b/src/copyvios/checker.py @@ -101,8 +101,7 @@ def _get_results( result = _perform_check(query, page, conn) finally: conn.close() - if turnitin_result: - result.metadata.turnitin_result = turnitin_result + result.metadata.turnitin_result = turnitin_result elif query.action == "compare": if not query.url: @@ -229,7 +228,7 @@ def _get_cached_results( ) data = cursor.fetchall() - if not data: # TODO: do something less hacky for this edge case + if not data: # TODO: Do something less hacky for this edge case article_chain = CopyvioChecker(page).article_chain result = CopyvioCheckResult( False, [], queries, check_time, article_chain, possible_miss diff --git a/src/copyvios/highlighter.py b/src/copyvios/highlighter.py index c953401..b0d3d06 100644 --- a/src/copyvios/highlighter.py +++ b/src/copyvios/highlighter.py @@ -13,9 +13,7 @@ from earwigbot.wiki.copyvios.markov import ( ) -def highlight_delta( - context, chain: MarkovChain, delta: MarkovChainIntersection | None -) -> str: +def highlight_delta(chain: MarkovChain, delta: MarkovChainIntersection | None) -> str: degree = chain.degree - 1 highlights = [False] * degree block: deque[str | Sentinel] = deque([Sentinel.START] * degree) diff --git a/src/copyvios/misc.py b/src/copyvios/misc.py index bd962ae..53dc158 100644 --- a/src/copyvios/misc.py +++ b/src/copyvios/misc.py @@ -10,11 +10,15 @@ __all__ = [ import datetime import os import sqlite3 +import urllib.parse from typing import TypeVar import pymysql +from flask import g, request +from . import app from .cache import cache +from .query import CheckQuery T = TypeVar("T") @@ -50,17 +54,34 @@ def get_notice() -> str | None: return None -def httpsfix(context, url: str) -> str: - if url.startswith("http://"): - url = url[len("http:") :] - return url +def get_permalink(query: CheckQuery) -> str: + params = { + "lang": query.orig_lang, + "project": query.project, + "oldid": query.oldid or g.page.lastrevid, + "action": query.action, + } + if query.action == "search": + params["use_engine"] = int(query.use_engine) + params["use_links"] = int(query.use_links) + elif query.action == "compare": + params["url"] = query.url + return f"{request.script_root}/?{urllib.parse.urlencode(params)}" def parse_wiki_timestamp(timestamp: str) -> datetime.datetime: return datetime.datetime.strptime(timestamp, "%Y%m%d%H%M%S") -def urlstrip(context, url: str) -> str: +@app.template_filter() +def httpsfix(url: str) -> str: + if url.startswith("http://"): + url = url[len("http:") :] + return url + + +@app.template_filter() +def urlstrip(url: str) -> str: if url.startswith("http://"): url = url[7:] if url.startswith("https://"): diff --git a/static/api.css b/src/copyvios/static/api.css similarity index 93% rename from static/api.css rename to src/copyvios/static/api.css index beef43e..ac325d2 100644 --- a/static/api.css +++ b/src/copyvios/static/api.css @@ -11,15 +11,6 @@ pre { max-width: 1200px; } -.json { - font-family: monospace; -} - -.indent { - display: inline-block; - padding-left: 2em; -} - .code { font-family: monospace; } diff --git a/src/copyvios/static/api.min.css b/src/copyvios/static/api.min.css new file mode 100644 index 0000000..5c5a37e --- /dev/null +++ b/src/copyvios/static/api.min.css @@ -0,0 +1 @@ +h1,h2{font-family:sans-serif}pre{white-space:pre-wrap}#help{margin:auto;max-width:1200px}.code{font-family:monospace}.resp-cond,.resp-desc,.resp-dtype{background-color:#eee;padding:0 .25em}.resp-dtype{color:#009}.resp-cond:before,.resp-dtype:before{content:"("}.resp-cond:after,.resp-dtype:after{content:")"}.resp-desc{color:#050}.resp-cond{color:#900;font-style:italic}.param-key{color:#009;font-weight:700}.param-val{color:#900;font-weight:700}.parameters{margin:1em 0}.parameters tr:first-child{color:#fff;font-family:sans-serif;font-size:1.17em}.parameters tr:first-child th{background-color:#369}.parameters td,.parameters th{padding:.2em .5em}.parameters th{background-color:#f0f0f0}.parameters td:first-child{font-family:monospace}.parameters tr:nth-child(2n+3){background-color:#e0e0e0}.parameters tr:nth-child(2n+4){background-color:#f0f0f0}a:link,a:visited{color:#373;text-decoration:none}a:hover{color:#040}a:active,a:hover{text-decoration:underline}a:active{color:#404}.no-color:link,.no-color:visited{color:#000;text-decoration:none}.no-color:active,.no-color:hover{color:#000;text-decoration:underline} \ No newline at end of file diff --git a/static/css/style.css b/src/copyvios/static/css/style.css similarity index 100% rename from static/css/style.css rename to src/copyvios/static/css/style.css diff --git a/src/copyvios/static/favicon.ico b/src/copyvios/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..32cc28710da29816ef09d3a0e7d09cd3318b3da6 GIT binary patch literal 15086 zcmeHO&1(}u6o0WoP?5BkB6vuU3W|aOLGhFx^(4Kh$5y0xQ0P%n%%UKkl%7of0}ozA z@Ej352;#+qUbI5-T-!^EYu5L6`m(c=+1=UABx&gkOn&q6K7Q}*?qqpQ0ZK3h^*SKd z;MGZh3jk27?aZeEe&E_PBJJNk2=EjIPN58{z#?{958Ipo_vtd=GT<_>mol*2J|9e~ z+sMH&gq0`O|%8YI|IArZMbB?wli&dx>RtGafFNW^~tM+-Hl8 z9955>xXH2Du;*;Vzk~NvKKMS$!b#>Ni<`;*UgvKTqm!|ff%~5T+E};Xo@6(^Pv>%b z83MJ-=XH`L`|HFl-r_u%`|2i3sl3&>$-4mCvX62+%w1l?tEI`kyk?(MwndRMnWvun z%Uj^fTy^7YZ?+|vE8FEcBd=%Yp5lq5M>`VldMZbr%Sg|;Xu5o7tmQ~qp?kz=a%3YeOEDO8JU5=EdFun^qKA&rE93W%u`*@H>a8a$CXbr zF0FqS<1%jMGT<`cGBAk@1YrX(#tyPSzEt0>9(^y64>@MdxR>`HXWqg-`KH*pao)4z*Kis5 zdl|{F%>A>FE4Cz7F4Y>$ZobqO|2ZK7OHm6hWdMmasfo*}BmP zdaXvVU2g=N^-A!&4i$d|D(j06t6%))&&ZP)QJsRy5;g4oItBi(*=}xm-V1bb46DyJ z)9kQ}4%75_uc#jD6UsK^^}J)UlUGNTm$}#*w_<0!nO*)q5y)paVYdE(ohKvuu8{S0 zxLoqODmSiqSX0FetxbvEtsr9e=oBgm79`Vc0(`~3j&iTJs9q}ie~*b` Aq5uE@ literal 0 HcmV?d00001 diff --git a/static/script.js b/src/copyvios/static/script.js similarity index 100% rename from static/script.js rename to src/copyvios/static/script.js diff --git a/static/script.min.js b/src/copyvios/static/script.min.js similarity index 100% rename from static/script.min.js rename to src/copyvios/static/script.min.js diff --git a/static/style.min.css b/src/copyvios/static/style.min.css similarity index 100% rename from static/style.min.css rename to src/copyvios/static/style.min.css diff --git a/static/toolinfo.json b/src/copyvios/static/toolinfo.json similarity index 100% rename from static/toolinfo.json rename to src/copyvios/static/toolinfo.json diff --git a/src/copyvios/templates/api_help.html.jinja b/src/copyvios/templates/api_help.html.jinja new file mode 100644 index 0000000..d9733ca --- /dev/null +++ b/src/copyvios/templates/api_help.html.jinja @@ -0,0 +1,285 @@ + + + + + API - Earwig's Copyvio Detector + + + +
+

Copyvio Detector API

+

This is the first version of the API for Earwig's Copyvio Detector. Please report any issues you encounter.

+

Requests

+

The API responds to GET requests made to https://copyvios.toolforge.org/api.json. Parameters are described in the tables below:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Always
ParameterValuesRequired?Description
actioncompare, search, sitesYesThe API will do URL comparisons in compare mode, run full copyvio checks in search mode, and list all known site languages and projects in sites mode.
formatjson, jsonfmNo (default: json)The default output format is JSON. jsonfm mode produces the same output, but renders it as a formatted HTML document for debugging.
versionintegerNo (default: 1)Currently, the API only has one version. You can skip this parameter, but it is recommended to include it for forward compatibility.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
compare Mode
ParameterValuesRequired?Description
projectstringYesThe project code of the site the page lives on. Examples are wikipedia and wiktionary. A list of acceptable values can be retrieved using action=sites.
langstringYesThe language code of the site the page lives on. Examples are en and de. A list of acceptable values can be retrieved using action=sites.
titlestringYes (either title or oldid)The title of the page or article to make a comparison against. Namespace must be included if the page isn't in the mainspace.
oldidintegerYes (either title or oldid)The revision ID (also called oldid) of the page revision to make a comparison against. If both a title and oldid are given, the oldid will be used.
urlstringYesThe URL of the suspected violation source that will be compared to the page.
detailbooleanNo (default: false)Whether to include the detailed HTML text comparison available in the regular interface. If not, only the similarity percentage is available.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
search Mode
ParameterValuesRequired?Description
projectstringYesThe project code of the site the page lives on. Examples are wikipedia and wiktionary. A list of acceptable values can be retrieved using action=sites.
langstringYesThe language code of the site the page lives on. Examples are en and de. A list of acceptable values can be retrieved using action=sites.
titlestringYes (either title or oldid)The title of the page or article to make a check against. Namespace must be included if the page isn't in the mainspace.
oldidintegerYes (either title or oldid)The revision ID (also called oldid) of the page revision to make a check against. If both a title and oldid are given, the oldid will be used.
use_enginebooleanNo (default: true)Whether to use a search engine (Google) as a source of URLs to compare against the page.
use_linksbooleanNo (default: true)Whether to compare the page against external links found in its wikitext.
nocachebooleanNo (default: false)Whether to bypass search results cached from previous checks. It is recommended that you don't pass this option unless a user specifically asks for it.
noredirectbooleanNo (default: false)Whether to avoid following redirects if the given page is a redirect.
noskipbooleanNo (default: false)If a suspected source is found during a check to have a sufficiently high similarity value, the check will end prematurely, and other pending URLs will be skipped. Passing this option will prevent this behavior, resulting in complete (but more time-consuming) checks.
+

Responses

+

The JSON response object always contains a status key, whose value is either ok or error. If an error has occurred, the response will look like this:

+
{
+"status": "error",
+"error": {
+    "code": string error code,
+    "info": string human-readable description of error
+}
+}
+

Valid responses for action=compare and action=search are formatted like this:

+
{
+"status": "ok",
+"meta": {
+    "time":       float time to generate results, in seconds,
+    "queries":    int number of search engine queries made,
+    "cached":     boolean whether these results are cached from an earlier search (always false in the case of action=compare),
+    "redirected": boolean whether a redirect was followed,
+    only if cached=true "cache_time": string human-readable time of the original search that the results are cached from
+},
+"page": {
+    "title": string the normalized title of the page checked,
+    "url":   string the full URL of the page checked
+},
+only if redirected=true "original_page": {
+    "title": string the normalized title of the original page whose redirect was followed,
+    "url":   string the full URL of the original page whose redirect was followed
+},
+"best": {
+    "url":        string the URL of the best match found, or null if no matches were found,
+    "confidence": float the similarity of a violation in the best match, or 0.0 if no matches were found,
+    "violation":  string one of "suspected", "possible", or "none"
+},
+"sources": [
+    {
+        "url":        string the URL of the source,
+        "confidence": float the similarity of the source to the page checked as a ratio between 0.0 and 1.0,
+        "violation":  string one of "suspected", "possible", or "none",
+        "skipped":    boolean whether the source was skipped due to the check finishing early (see note about noskip above) or an exclusion,
+        "excluded":    boolean whether the source was skipped for being in the excluded URL list
+    },
+    ...
+],
+only if action=compare and detail=true "detail": {
+    "article": string article text, with shared passages marked with HTML,
+    "source":  string source text, with shared passages marked with HTML
+}
+}
+

In the case of action=search, sources will contain one entry for each source checked (or skipped if the check ends early), sorted by similarity, with skipped and excluded sources at the bottom.

+

In the case of action=compare, best will always contain information about the URL that was given, so response["best"]["url"] will never be null. Also, sources will always contain one entry, with the same data as best, since only one source is checked in comparison mode.

+

Valid responses for action=sites are formatted like this:

+
{
+"status": "ok",
+"langs": [
+    [
+        string language code,
+        string human-readable language name
+    ],
+    ...
+],
+"projects": [
+    [
+        string project code,
+        string human-readable project name
+    ],
+    ...
+]
+}
+

Etiquette

+

The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with use_engine=True are globally limited to around a thousand per day. Be respectful!

+

Aside from testing, you must set a reasonable user agent that identifies your bot and and gives some way to contact you. You may be blocked if using an improper user agent (for example, the default user agent set by your HTTP library), or if your bot makes requests too frequently.

+

Example

+

https://copyvios.toolforge.org/api.json?version=1&action=search&project=wikipedia&lang=en&title=User:EarwigBot/Copyvios/Tests/2

+
{
+"status": "ok",
+"meta": {
+    "time": 2.2474379539489746,
+    "queries": 1,
+    "cached": false,
+    "redirected": false
+},
+"page": {
+    "title": "User:EarwigBot/Copyvios/Tests/2",
+    "url": "https://en.wikipedia.org/wiki/User:EarwigBot/Copyvios/Tests/2"
+},
+"best": {
+    "url": "http://www.whitehouse.gov/administration/president-obama/",
+    "confidence": 0.9886608511242603,
+    "violation": "suspected"
+}
+"sources": [
+    {
+        "url": "http://www.whitehouse.gov/administration/president-obama/",
+        "confidence": 0.9886608511242603,
+        "violation": "suspected",
+        "skipped": false,
+        "excluded": false
+    },
+    {
+        "url": "http://maige2009.blogspot.com/2013/07/barack-h-obama-is-44th-president-of.html",
+        "confidence": 0.9864798816568047,
+        "violation": "suspected",
+        "skipped": false,
+        "excluded": false
+    },
+    {
+        "url": "http://jeuxdemonstre-apkdownload.rhcloud.com/luo-people-of-kenya-and-tanzania---wikipedia--the-free",
+        "confidence": 0.0,
+        "violation": "none",
+        "skipped": false,
+        "excluded": false
+    },
+    {
+        "url": "http://www.whitehouse.gov/about/presidents/barackobama",
+        "confidence": 0.0,
+        "violation": "none",
+        "skipped": true,
+        "excluded": false
+    },
+    {
+        "url": "http://jeuxdemonstre-apkdownload.rhcloud.com/president-barack-obama---the-white-house",
+        "confidence": 0.0,
+        "violation": "none",
+        "skipped": true,
+        "excluded": false
+    }
+]
+}
+
+
+ + diff --git a/src/copyvios/templates/api_result.html.jinja b/src/copyvios/templates/api_result.html.jinja new file mode 100644 index 0000000..6064249 --- /dev/null +++ b/src/copyvios/templates/api_result.html.jinja @@ -0,0 +1,14 @@ + + + + + API - Earwig's Copyvio Detector + + + +
+

You are using jsonfm output mode, which renders JSON data as a formatted HTML document. This is intended for testing and debugging only.

+
{{ dump_json(result, indent=4) | e }}
+
+ + diff --git a/src/copyvios/templates/error.html.jinja b/src/copyvios/templates/error.html.jinja new file mode 100644 index 0000000..883055a --- /dev/null +++ b/src/copyvios/templates/error.html.jinja @@ -0,0 +1,11 @@ +{% extends "support/base.html.jinja" %} +{% block title %} + Error! - {{ super() }} +{% endblock %} +{% block content %} +

Error!

+

An error occurred. If it hasn't been reported (try to check), please file an issue or email me. Include the following information:

+
+
{{ traceback | trim | e }}
+
+{% endblock %} diff --git a/src/copyvios/templates/index.html.jinja b/src/copyvios/templates/index.html.jinja new file mode 100644 index 0000000..ed444bc --- /dev/null +++ b/src/copyvios/templates/index.html.jinja @@ -0,0 +1,54 @@ +{% extends "support/base.html.jinja" %} +{% block title %} + {% if g.page %} + {{ g.page.title | e }} - {{ super() }} + {% else %} + {{ super() }} + {% endif %} +{% endblock %} +{% block content %} + {% if query.submitted %} + {% if error %} +

+ {% if error.code == ErrorCode.BAD_ACTION %} + Unknown action: {{ query.action | e }}. + {% elif error.code == ErrorCode.NO_SEARCH_METHOD %} + No copyvio search methods were selected. A check can only be made using the search engine, links present in the page, Turnitin, or some combination of these. + {% elif error.code == ErrorCode.BAD_OLDID %} + The revision ID {{ query.oldid | e }} is invalid. It should be an integer. + {% elif error.code == ErrorCode.NO_URL %} + Compare mode requires a URL to be entered. Enter one in the text box below, or choose copyvio search mode to look for content similar to the article elsewhere on the web. + {% elif error.code == ErrorCode.BAD_URI %} + Unsupported URI scheme: {{ query.url | e }}. + {% elif error.code == ErrorCode.NO_DATA %} + Couldn't find any text in {{ query.url | e }}. Note: only HTML documents, plain text pages, and PDFs are supported, and content generated by JavaScript or found inside iframes is ignored. + {% elif error.code == ErrorCode.TIMEOUT %} + The URL {{ query.url | e }} timed out before any data could be retrieved. + {% elif error.code == ErrorCode.SEARCH_ERROR %} + An error occurred while using the search engine ({{ error.__cause__ }}). Note: there is a daily limit on the number of search queries the tool is allowed to make. You may repeat the check without using the search engine. + {% else %} + An unknown error occurred. + {% endif %} +

+ {% elif not g.site %} +
+

The given site (project={{ query.project | e }}, language={{ query.lang | e }}) doesn't seem to exist. It may also be closed or private. Confirm its URL.

+
+ {% elif query.oldid and not result %} +
+

The revision ID couldn't be found: {{ query.oldid | e }}.

+
+ {% elif query.title and not result %} +
+

The page couldn't be found: {{ g.page.title | e }}.

+
+ {% endif %} + {% endif %} +

This tool attempts to detect copyright violations in articles. In search mode, it will check for similar content elsewhere on the web using Google, external links present in the text of the page, or Turnitin (via EranBot), depending on which options are selected. In compare mode, the tool will compare the article to a specific webpage without making additional searches, like the Duplication Detector.

+

Running a full check can take up to a minute if other websites are slow or if the tool is under heavy use. Please be patient. If you get a timeout, wait a moment and refresh the page.

+

Be aware that other websites can copy from Wikipedia, so check the results carefully, especially for older or well-developed articles. Specific websites can be skipped by adding them to the excluded URL list.

+ {% include "support/cv_form.html.jinja" %} + {% if result %} + {% include "support/cv_result.html.jinja" %} + {% endif %} +{% endblock %} diff --git a/src/copyvios/templates/settings.html.jinja b/src/copyvios/templates/settings.html.jinja new file mode 100644 index 0000000..14eb3e6 --- /dev/null +++ b/src/copyvios/templates/settings.html.jinja @@ -0,0 +1,93 @@ +{% extends "support/base.html.jinja" %} +{% block title %} + Settings - {{ super() }} +{% endblock %} +{% block content %} + {% if status %} +
+

{{ status | safe }}

+
+ {% endif %} +

Settings

+

This page contains some configurable options for the copyvio detector. Settings are saved as cookies.

+
+

Default site

+
+
+
+
+
+
+ +
+
+ +
+
+
+
+
+
+ +

Background

+ {% set background_options = [ + ("list", 'Randomly select from a subset of previous Wikimedia Commons Pictures of the Day that work well as widescreen backgrounds, refreshed daily (default).'), + ("potd", 'Use the current Commons Picture of the Day, unfiltered. Certain POTDs may be unsuitable as backgrounds due to their aspect ratio or subject matter.'), + ("plain", "Use a plain background."), + ] + %} +
+
+
+
+ {% for value, desc in background_options %} +
+
+ + + + + + + + +
+
+ {% endfor %} +
+
+
+
+ + +
+
+ + + + + +
+
+
+{% endblock %} diff --git a/src/copyvios/templates/support/base.html.jinja b/src/copyvios/templates/support/base.html.jinja new file mode 100644 index 0000000..3495f91 --- /dev/null +++ b/src/copyvios/templates/support/base.html.jinja @@ -0,0 +1,50 @@ + + + + + {% block title %}Earwig's Copyvio Detector{% endblock %} + + + + + + + +{% set cookies = get_cookies() %} +{% set selected = cookies["CopyviosBackground"].value if "CopyviosBackground" in cookies else "list" %} +{% set bg_url, desc_url = get_background(selected) %} +{% if bg_url %} + +{% else %} + +{% endif %} +
+
+
+

Earwig's Copyvio Detector

+ Settings +
+
+ {% set notice = get_notice() %} + {% if notice %} +
+ {{ notice | safe }} +
+ {% endif %} + {% block content required %}{% endblock %} +
+
+
+
+ + + diff --git a/src/copyvios/templates/support/cv_form.html.jinja b/src/copyvios/templates/support/cv_form.html.jinja new file mode 100644 index 0000000..817f6bb --- /dev/null +++ b/src/copyvios/templates/support/cv_form.html.jinja @@ -0,0 +1,113 @@ +
+
+ +
+ +
+
+ +
+
+
+ +
+ +
+ +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+ +
+
+ {% if query.nocache or (result and result.metadata.cached) %} +
+ + + + + +
+ {% endif %} +
+ + + +
+
diff --git a/src/copyvios/templates/support/cv_result.html.jinja b/src/copyvios/templates/support/cv_result.html.jinja new file mode 100644 index 0000000..b3ad6b6 --- /dev/null +++ b/src/copyvios/templates/support/cv_result.html.jinja @@ -0,0 +1,147 @@ +
+ Results + {% if result.metadata.cached %} + cachedTo save time (and money), this tool will retain the results of checks for up to 72 hours. This includes the URLs of the checked sources, but neither their content nor the content of the article. Future checks on the same page (assuming it remains unchanged) will not involve additional search queries, but a fresh comparison against the source URL will be made. If the page is modified, a new check will be run. from {{ result.metadata.cache_age }} ago. Originally + {% endif %} + generated in {{ result.time | round(3) }} + {% if query.action == "search" %} + seconds using {{ result.queries }} quer{{ "y" if result.queries == 1 else "ies" }}. + {% else %} + seconds. + {% endif %} + Permalink. +
+ +
+ + + + + + + + + + + +
+ {{ g.page.title | e }} + {% if query.oldid %} + @{{ query.oldid | e }} + {% endif %} + {% if query.redirected_from %} +
+ Redirected from {{ result.metadata.redirected_from.title | e }}. Check original. + {% endif %} +
+
+ {% if result.confidence >= T_SUSPECT %} + Violation suspected + {% elif result.confidence >= T_POSSIBLE %} + Violation possible + {% elif result.sources %} + Violation unlikely + {% else %} + No violation + {% endif %} +
+
{{ (result.confidence * 100) | round(1) }}%
+
similarity
+
+ {% if result.url %} + {{ result.url | urlstrip | e }} + {% else %} + No matches found. + {% endif %} +
+
+ +{% set attrib = get_attribution_info(g.site, g.page) %} +{% if attrib %} +
+ This article contains an attribution template: {{ '{{' }}{{ attrib[0] | e }}{{ '}}' }}. Please verify that any potential copyvios are not from properly attributed sources. +
+{% endif %} + +{% if result.metadata.turnitin_result %} +
+
Turnitin Results
+ {% if result.metadata.turnitin_result.reports %} + + {% for report in turnitin_result.reports %} + + {% endfor %} +
Report {{ report.reportid }} for text added at {{ report.time_posted.strftime("%H:%M, %d %B %Y (UTC)") }}: +
    + {% for source in report.sources %} +
  • {{ source['percent'] }}% of revision text ({{ source['words'] }} words) found at {{ source['url'] | e }}
  • + {% endfor %} +
+ {% else %} +
No matching sources found.
+ {% endif %} +
+{% endif %} + +{% if query.action == "search" %} + {% set skips = False %} +
+
Checked Sources
+ {% if result.sources %} + + + + + + + + + + + + {% for i, source in enumerate(result.sources) %} + = 10 else 'id="source-row-selected"' if i == 0 else "" }}> + + + + + {% endfor %} +
URLSimilarityCompare
{{ source.url | e }} + {% if source.excluded %} + Excluded + {% elif source.skipped %} + {% set skips = True %} + Skipped + {% else %} + = T_SUSPECT else "source-possible" if source.confidence >= T_POSSIBLE else "source-novio" }}">{{ source.confidence * 100 | round(1) }}% + {% endif %} + + Compare +
+ {% else %} + + {% endif %} + {% if len(result.sources) > 10 %} + + {% endif %} + {% if skips or result.possible_miss %} + + {% endif %} +
+{% endif %} + + + + + +
Article:
+

{{ highlight_delta(result.article_chain, result.best.chains[1] if result.best else None) }}

+
Source:
+

{{ highlight_delta(result.best.chains[0], result.best.chains[1]) if result.best else "" }}

+
diff --git a/static/api.min.css b/static/api.min.css deleted file mode 100644 index 97b2e3e..0000000 --- a/static/api.min.css +++ /dev/null @@ -1 +0,0 @@ -h1,h2{font-family:sans-serif}pre{white-space:pre-wrap}#help{margin:auto;max-width:1200px}.json{font-family:monospace}.indent{display:inline-block;padding-left:2em}.code{font-family:monospace}.resp-cond,.resp-desc,.resp-dtype{background-color:#eee;padding:0 .25em}.resp-dtype{color:#009}.resp-cond:before,.resp-dtype:before{content:"("}.resp-cond:after,.resp-dtype:after{content:")"}.resp-desc{color:#050}.resp-cond{color:#900;font-style:italic}.param-key{color:#009;font-weight:700}.param-val{color:#900;font-weight:700}.parameters{margin:1em 0}.parameters tr:first-child{color:#fff;font-family:sans-serif;font-size:1.17em}.parameters tr:first-child th{background-color:#369}.parameters td,.parameters th{padding:.2em .5em}.parameters th{background-color:#f0f0f0}.parameters td:first-child{font-family:monospace}.parameters tr:nth-child(2n+3){background-color:#e0e0e0}.parameters tr:nth-child(2n+4){background-color:#f0f0f0}a:link,a:visited{color:#373;text-decoration:none}a:hover{color:#040}a:active,a:hover{text-decoration:underline}a:active{color:#404}.no-color:link,.no-color:visited{color:#000;text-decoration:none}.no-color:active,.no-color:hover{color:#000;text-decoration:underline} \ No newline at end of file diff --git a/templates/api.mako b/templates/api.mako deleted file mode 100644 index a507137..0000000 --- a/templates/api.mako +++ /dev/null @@ -1,326 +0,0 @@ -<%! - from json import dumps - from flask import url_for -%>\ -<%def name="do_indent(size)"> -
- % for i in xrange(size): -
- % endfor -\ -<%def name="walk_json(obj, indent=0)"> - % if isinstance(obj, type({})): - { - % for key in obj: - ${do_indent(indent + 1)} - "${key | h}": ${walk_json(obj[key], indent + 1)}${"," if not loop.last else ""} - % endfor - ${do_indent(indent)} - } - % elif isinstance(obj, (list, tuple, set)): - [ - % for elem in obj: - ${do_indent(indent + 1)} - ${walk_json(elem, indent + 1)}${"," if not loop.last else ""} - % endfor - ${do_indent(indent)} - ] - % else: - ${dumps(obj) | h} - % endif -\ - - - - - API | Earwig's Copyvio Detector - - - - % if help: -
-

Copyvio Detector API

-

This is the first version of the API for Earwig's Copyvio Detector. Please report any issues you encounter.

-

Requests

-

The API responds to GET requests made to https://copyvios.toolforge.org/api.json. Parameters are described in the tables below:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Always
ParameterValuesRequired?Description
actioncompare, search, sitesYesThe API will do URL comparisons in compare mode, run full copyvio checks in search mode, and list all known site languages and projects in sites mode.
formatjson, jsonfmNo (default: json)The default output format is JSON. jsonfm mode produces the same output, but renders it as a formatted HTML document for debugging.
versionintegerNo (default: 1)Currently, the API only has one version. You can skip this parameter, but it is recommended to include it for forward compatibility.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
compare Mode
ParameterValuesRequired?Description
projectstringYesThe project code of the site the page lives on. Examples are wikipedia and wiktionary. A list of acceptable values can be retrieved using action=sites.
langstringYesThe language code of the site the page lives on. Examples are en and de. A list of acceptable values can be retrieved using action=sites.
titlestringYes (either title or oldid)The title of the page or article to make a comparison against. Namespace must be included if the page isn't in the mainspace.
oldidintegerYes (either title or oldid)The revision ID (also called oldid) of the page revision to make a comparison against. If both a title and oldid are given, the oldid will be used.
urlstringYesThe URL of the suspected violation source that will be compared to the page.
detailbooleanNo (default: false)Whether to include the detailed HTML text comparison available in the regular interface. If not, only the similarity percentage is available.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
search Mode
ParameterValuesRequired?Description
projectstringYesThe project code of the site the page lives on. Examples are wikipedia and wiktionary. A list of acceptable values can be retrieved using action=sites.
langstringYesThe language code of the site the page lives on. Examples are en and de. A list of acceptable values can be retrieved using action=sites.
titlestringYes (either title or oldid)The title of the page or article to make a check against. Namespace must be included if the page isn't in the mainspace.
oldidintegerYes (either title or oldid)The revision ID (also called oldid) of the page revision to make a check against. If both a title and oldid are given, the oldid will be used.
use_enginebooleanNo (default: true)Whether to use a search engine (Google) as a source of URLs to compare against the page.
use_linksbooleanNo (default: true)Whether to compare the page against external links found in its wikitext.
nocachebooleanNo (default: false)Whether to bypass search results cached from previous checks. It is recommended that you don't pass this option unless a user specifically asks for it.
noredirectbooleanNo (default: false)Whether to avoid following redirects if the given page is a redirect.
noskipbooleanNo (default: false)If a suspected source is found during a check to have a sufficiently high similarity value, the check will end prematurely, and other pending URLs will be skipped. Passing this option will prevent this behavior, resulting in complete (but more time-consuming) checks.
-

Responses

-

The JSON response object always contains a status key, whose value is either ok or error. If an error has occurred, the response will look like this:

-
{
-    "status": "error",
-    "error": {
-        "code": string error code,
-        "info": string human-readable description of error
-    }
-}
-

Valid responses for action=compare and action=search are formatted like this:

-
{
-    "status": "ok",
-    "meta": {
-        "time":       float time to generate results, in seconds,
-        "queries":    int number of search engine queries made,
-        "cached":     boolean whether these results are cached from an earlier search (always false in the case of action=compare),
-        "redirected": boolean whether a redirect was followed,
-        only if cached=true "cache_time": string human-readable time of the original search that the results are cached from
-    },
-    "page": {
-        "title": string the normalized title of the page checked,
-        "url":   string the full URL of the page checked
-    },
-    only if redirected=true "original_page": {
-        "title": string the normalized title of the original page whose redirect was followed,
-        "url":   string the full URL of the original page whose redirect was followed
-    },
-    "best": {
-        "url":        string the URL of the best match found, or null if no matches were found,
-        "confidence": float the similarity of a violation in the best match, or 0.0 if no matches were found,
-        "violation":  string one of "suspected", "possible", or "none"
-    },
-    "sources": [
-        {
-            "url":        string the URL of the source,
-            "confidence": float the similarity of the source to the page checked as a ratio between 0.0 and 1.0,
-            "violation":  string one of "suspected", "possible", or "none",
-            "skipped":    boolean whether the source was skipped due to the check finishing early (see note about noskip above) or an exclusion,
-            "excluded":    boolean whether the source was skipped for being in the excluded URL list
-        },
-        ...
-    ],
-    only if action=compare and detail=true "detail": {
-        "article": string article text, with shared passages marked with HTML,
-        "source":  string source text, with shared passages marked with HTML
-    }
-}
-

In the case of action=search, sources will contain one entry for each source checked (or skipped if the check ends early), sorted by similarity, with skipped and excluded sources at the bottom.

-

In the case of action=compare, best will always contain information about the URL that was given, so response["best"]["url"] will never be null. Also, sources will always contain one entry, with the same data as best, since only one source is checked in comparison mode.

-

Valid responses for action=sites are formatted like this:

-
{
-    "status": "ok",
-    "langs": [
-        [
-            string language code,
-            string human-readable language name
-        ],
-        ...
-    ],
-    "projects": [
-        [
-            string project code,
-            string human-readable project name
-        ],
-        ...
-    ]
-}
-

Etiquette

-

The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with use_engine=True are globally limited to around a thousand per day. Be respectful!

-

Aside from testing, you must set a reasonable user agent that identifies your bot and and gives some way to contact you. You may be blocked if using an improper user agent (for example, the default user agent set by your HTTP library), or if your bot makes requests too frequently.

-

Example

-

https://copyvios.toolforge.org/api.json?version=1&action=search&project=wikipedia&lang=en&title=User:EarwigBot/Copyvios/Tests/2

-
{
-    "status": "ok",
-    "meta": {
-        "time": 2.2474379539489746,
-        "queries": 1,
-        "cached": false,
-        "redirected": false
-    },
-    "page": {
-        "title": "User:EarwigBot/Copyvios/Tests/2",
-        "url": "https://en.wikipedia.org/wiki/User:EarwigBot/Copyvios/Tests/2"
-    },
-    "best": {
-        "url": "http://www.whitehouse.gov/administration/president-obama/",
-        "confidence": 0.9886608511242603,
-        "violation": "suspected"
-    }
-    "sources": [
-        {
-            "url": "http://www.whitehouse.gov/administration/president-obama/",
-            "confidence": 0.9886608511242603,
-            "violation": "suspected",
-            "skipped": false,
-            "excluded": false
-        },
-        {
-            "url": "http://maige2009.blogspot.com/2013/07/barack-h-obama-is-44th-president-of.html",
-            "confidence": 0.9864798816568047,
-            "violation": "suspected",
-            "skipped": false,
-            "excluded": false
-        },
-        {
-            "url": "http://jeuxdemonstre-apkdownload.rhcloud.com/luo-people-of-kenya-and-tanzania---wikipedia--the-free",
-            "confidence": 0.0,
-            "violation": "none",
-            "skipped": false,
-            "excluded": false
-        },
-        {
-            "url": "http://www.whitehouse.gov/about/presidents/barackobama",
-            "confidence": 0.0,
-            "violation": "none",
-            "skipped": true,
-            "excluded": false
-        },
-        {
-            "url": "http://jeuxdemonstre-apkdownload.rhcloud.com/president-barack-obama---the-white-house",
-            "confidence": 0.0,
-            "violation": "none",
-            "skipped": true,
-            "excluded": false
-        }
-    ]
-}
-
-
- % endif - % if result: -
-

You are using jsonfm output mode, which renders JSON data as a formatted HTML document. This is intended for testing and debugging only.

-
- ${walk_json(result)} -
-
- % endif - - diff --git a/templates/error.mako b/templates/error.mako deleted file mode 100644 index f26aea1..0000000 --- a/templates/error.mako +++ /dev/null @@ -1,7 +0,0 @@ -<%include file="/support/header.mako" args="title='Error! | Earwig\'s Copyvio Detector'"/> -

Error!

-

An error occurred. If it hasn't been reported (try to check), please file an issue or email me. Include the following information:

-
-
${traceback | trim,h}
-
-<%include file="/support/footer.mako"/> diff --git a/templates/index.mako b/templates/index.mako deleted file mode 100644 index 22e3d0b..0000000 --- a/templates/index.mako +++ /dev/null @@ -1,323 +0,0 @@ -<%! - from flask import request - from copyvios.attribution import get_attribution_info - from copyvios.checker import T_POSSIBLE, T_SUSPECT - from copyvios.cookies import get_cookies - from copyvios.misc import cache -%>\ -<% - titleparts = [] - if query.page: - titleparts.append(query.page.title) - titleparts.append("Earwig's Copyvio Detector") - title = " | ".join(titleparts) - cookies = get_cookies() -%>\ -<%include file="/support/header.mako" args="title=title, splash=not result"/> -<%namespace module="copyvios.highlighter" import="highlight_delta"/>\ -<%namespace module="copyvios.misc" import="httpsfix, urlstrip"/>\ -% if notice: -
- ${notice} -
-% endif -% if query.submitted: - % if query.error: -

- % if query.error == "bad action": - Unknown action: ${query.action | h}. - % elif query.error == "no search method": - No copyvio search methods were selected. A check can only be made using the search engine, links present in the page, Turnitin, or some combination of these. - % elif query.error == "bad oldid": - The revision ID ${query.oldid | h} is invalid. It should be an integer. - % elif query.error == "no URL": - Compare mode requires a URL to be entered. Enter one in the text box below, or choose copyvio search mode to look for content similar to the article elsewhere on the web. - % elif query.error == "bad URI": - Unsupported URI scheme: ${query.url | h}. - % elif query.error == "no data": - Couldn't find any text in ${query.url | h}. Note: only HTML documents, plain text pages, and PDFs are supported, and content generated by JavaScript or found inside iframes is ignored. - % elif query.error == "timeout": - The URL ${query.url | h} timed out before any data could be retrieved. - % elif query.error == "search error": - An error occurred while using the search engine (${query.error.__cause__}). Note: there is a daily limit on the number of search queries the tool is allowed to make. You may repeat the check without using the search engine. - % else: - An unknown error occurred. - % endif -

- % elif not query.site: -
-

The given site (project=${query.project | h}, language=${query.lang | h}) doesn't seem to exist. It may also be closed or private. Confirm its URL.

-
- % elif query.oldid and not result: -
-

The revision ID couldn't be found: ${query.oldid | h}.

-
- % elif query.title and not result: -
-

The page couldn't be found: ${query.page.title | h}.

-
- % endif -%endif -

This tool attempts to detect copyright violations in articles. In search mode, it will check for similar content elsewhere on the web using Google, external links present in the text of the page, or Turnitin (via EranBot), depending on which options are selected. In compare mode, the tool will compare the article to a specific webpage without making additional searches, like the Duplication Detector.

-

Running a full check can take up to a minute if other websites are slow or if the tool is under heavy use. Please be patient. If you get a timeout, wait a moment and refresh the page.

-

Be aware that other websites can copy from Wikipedia, so check the results carefully, especially for older or well-developed articles. Specific websites can be skipped by adding them to the excluded URL list.

-
-
- -
- -
-
- -
-
-
- -
- -
- -
- -
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - - -
- -
-
- % if query.nocache or (result and result.cached): -
- - - - - -
- % endif -
- - - -
-
- -% if result: - - -
- - - - - - - - - - - -
- ${query.page.title | h} - % if query.oldid: - @${query.oldid | h} - % endif - % if query.redirected_from: -
- Redirected from ${query.redirected_from.title | h}. Check original. - % endif -
-
- % if result.confidence >= T_SUSPECT: - Violation suspected - % elif result.confidence >= T_POSSIBLE: - Violation possible - % elif result.sources: - Violation unlikely - % else: - No violation - % endif -
-
${round(result.confidence * 100, 1)}%
-
similarity
-
- % if result.url: - ${result.url | urlstrip, h} - % else: - No matches found. - % endif -
-
- - <% attrib = get_attribution_info(query.site, query.page) %> - % if attrib: -
- This article contains an attribution template: {{${attrib[0] | h}}}. Please verify that any potential copyvios are not from properly attributed sources. -
- % endif - - % if query.turnitin_result: -
-
Turnitin Results
- % if query.turnitin_result.reports: - - % for report in turnitin_result.reports: - - % endfor -
Report ${report.reportid} for text added at ${report.time_posted.strftime("%H:%M, %d %B %Y (UTC)")}: -
    - % for source in report.sources: -
  • ${source['percent']}% of revision text (${source['words']} words) found at ${source['url'] | h}
  • - % endfor -
- % else: -
No matching sources found.
- % endif -
- % endif - - % if query.action == "search": - <% skips = False %> -
-
Checked Sources
- % if result.sources: - - - - - - - - - - - - % for i, source in enumerate(result.sources): - = 10 else 'id="source-row-selected"' if i == 0 else ""}> - - - - - % endfor -
URLSimilarityCompare
${source.url | h} - % if source.excluded: - Excluded - % elif source.skipped: - <% skips = True %> - Skipped - % else: - = T_SUSPECT else "source-possible" if source.confidence >= T_POSSIBLE else "source-novio"}">${round(source.confidence * 100, 1)}% - % endif - - Compare -
- % else: - - % endif - % if len(result.sources) > 10: - - % endif - % if skips or result.possible_miss: - - % endif -
- % endif - - - - - -
Article:

${highlight_delta(result.article_chain, result.best.chains[1] if result.best else None)}

Source:

${highlight_delta(result.best.chains[0], result.best.chains[1]) if result.best else ""}

-% endif -<%include file="/support/footer.mako"/> diff --git a/templates/settings.mako b/templates/settings.mako deleted file mode 100644 index c0998b2..0000000 --- a/templates/settings.mako +++ /dev/null @@ -1,100 +0,0 @@ -<%! - from json import dumps, loads - from flask import request - from copyvios.cookies import get_cookies - from copyvios.cache import cache -%>\ -<% - cookies = get_cookies() -%>\ -<%include file="/support/header.mako" args="title='Settings | Earwig\'s Copyvio Detector', splash=True"/> -% if status: -
-

${status}

-
-% endif -

Settings

-

This page contains some configurable options for the copyvio detector. Settings are saved as cookies.

-
-

Default site

-
-
-
-
-
-
- -
-
- -
-
-
-
-
-
- -

Background

- <% - background_options = [ - ("list", 'Randomly select from a subset of previous Wikimedia Commons Pictures of the Day that work well as widescreen backgrounds, refreshed daily (default).'), - ("potd", 'Use the current Commons Picture of the Day, unfiltered. Certain POTDs may be unsuitable as backgrounds due to their aspect ratio or subject matter.'), - ("plain", "Use a plain background."), - ] - selected = cookies["CopyviosBackground"].value if "CopyviosBackground" in cookies else "list" - %>\ -
-
-
-
- % for value, desc in background_options: -
-
- - - - - - - - -
-
- % endfor -
-
-
-
- - -
-
- - - - - -
-
-
-<%include file="/support/footer.mako"/> diff --git a/templates/support/footer.mako b/templates/support/footer.mako deleted file mode 100644 index e318b83..0000000 --- a/templates/support/footer.mako +++ /dev/null @@ -1,20 +0,0 @@ -<%! - from datetime import datetime - from flask import g, request -%>\ - - -
- - - - diff --git a/templates/support/header.mako b/templates/support/header.mako deleted file mode 100644 index ccab414..0000000 --- a/templates/support/header.mako +++ /dev/null @@ -1,34 +0,0 @@ -<%page args="title, splash=False"/>\ -<%! - from flask import request, url_for - from copyvios.background import get_background - from copyvios.cookies import get_cookies -%>\ -<% - cookies = get_cookies() -%>\ - - - - - ${title | h} - - - - - - - -<% selected = cookies["CopyviosBackground"].value if "CopyviosBackground" in cookies else "list" %>\ -% if selected == "plain": - -% else: - -% endif -