diff --git a/.gitignore b/.gitignore index 17ff478..674e006 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ *.egg-info .DS_Store __pycache__ - +venv .earwigbot logs/* !logs/.gitinclude diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e4bb1f5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.2 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format + - repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.377 + hooks: + - id: pyright diff --git a/LICENSE b/LICENSE index 68a6356..6a50d9f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009-2021 Ben Kurtovic +Copyright (c) 2009-2024 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 230bd66..0496500 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,46 @@ This is a [copyright violation](https://en.wikipedia.org/wiki/WP:COPYVIO) -detector running on [Wikimedia Cloud Services](https://copyvios.toolforge.org/). +detector web tool for Wikipedia articles running on +[Wikimedia Cloud Services](https://wikitech.wikimedia.org/wiki/Help:Cloud_Services_introduction) +at [copyvios.toolforge.org](https://copyvios.toolforge.org/). It can search the web for content similar to a given article, and graphically -compare an article to a specific URL. Some technical details are expanded upon -[in a blog post](https://benkurtovic.com/2014/08/20/copyvio-detector.html). +compare an article to specific URLs. Some technical details are expanded upon +[in a blog post](https://benkurtovic.com/2014/08/20/copyvio-detector.html), +though much of it is outdated. -Dependencies +Installation ============ -* [earwigbot](https://github.com/earwig/earwigbot) >= 0.1 -* [flask](https://flask.palletsprojects.com/) >= 0.10.1 -* [flask-mako](https://pythonhosted.org/Flask-Mako/) >= 0.3 -* [mako](https://www.makotemplates.org/) >= 0.7.2 -* [mwparserfromhell](https://github.com/earwig/mwparserfromhell) >= 0.3 -* [oursql](https://pythonhosted.org/oursql/) >= 0.9.3.1 -* [requests](https://requests.readthedocs.io/) >= 2.9.1 -* [SQLAlchemy](https://www.sqlalchemy.org/) >= 0.9.6 -* [apsw](https://github.com/rogerbinns/apsw) >= 3.26.0 -* [uglifyjs](https://github.com/mishoo/UglifyJS) >= 3.12.6 -* [cssnano](https://github.com/cssnano/cssnano) >= 4.1.10 -* [postcss-cli](https://github.com/postcss/postcss-cli) >= 8.3.1 +- If using Toolforge, clone the repository to `~/www/python/src`, or otherwise + symlink it to that directory. -Running -======= +- Create a virtual environment and install the dependencies. On Toolforge, + this should be in `~/www/python/venv`, otherwise it can be in a subdirectory + of the git project named `venv`: + + python3 -m venv venv + . venv/bin/activate + pip install -e . -- If using Toolforge, you should clone the repository to `~/www/python/src`, or - otherwise symlink it to that directory. A - [virtualenv](https://virtualenv.pypa.io/) should be created at - `~/www/python/venv`. +- If you intend to modify CSS or JS, install the frontend dependencies: -- Install all dependencies listed above. + npm install -g uglify-js cssnano postcss postcss-cli -- Create an SQL database with the `cache` and `cache_data` tables defined by - [earwigbot-plugins](https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/schema/afc_copyvios.sql). +- Create an SQL database with the tables defined by `schema.sql`. -- Create an earwigbot instance in `.earwigbot` (run `earwigbot .earwigbot`). In - `.earwigbot/config.yml`, fill out the connection info for the database by +- Create an earwigbot instance in `.earwigbot` (run `earwigbot .earwigbot`). + In `.earwigbot/config.yml`, fill out the connection info for the database by adding the following to the `wiki` section: - _copyviosSQL: + copyvios: + engine: mysql host: - db: + db: - If additional arguments are needed by `oursql.connect()`, like usernames or - passwords, they should be added to the `_copyviosSQL` section. +Running +======= -- Run `./build.py` to minify JS and CSS files. +- Run `./build.py` to minify JS and CSS files after making any frontend + changes. -- Start the web server (on Toolforge, `webservice uwsgi-python start`). +- Start your WSGI server pointing to app:app. diff --git a/app.py b/app.py index ff88f48..b45c145 100755 --- a/app.py +++ b/app.py @@ -1,10 +1,9 @@ #! /usr/bin/env python -# -*- coding: utf-8 -*- +import logging from functools import wraps from hashlib import md5 from json import dumps -from logging import DEBUG, INFO, getLogger from logging.handlers import TimedRotatingFileHandler from os import path from time import asctime @@ -13,7 +12,7 @@ from traceback import format_exc from earwigbot.bot import Bot from earwigbot.wiki.copyvios import globalize from flask import Flask, g, make_response, request -from flask_mako import MakoTemplates, render_template, TemplateError +from flask_mako import MakoTemplates, TemplateError, render_template from copyvios.api import format_api_error, handle_api_request from copyvios.checker import do_check @@ -26,24 +25,27 @@ app = Flask(__name__) MakoTemplates(app) hand = TimedRotatingFileHandler("logs/app.log", when="midnight", backupCount=7) -hand.setLevel(DEBUG) +hand.setLevel(logging.DEBUG) app.logger.addHandler(hand) -app.logger.info(u"Flask server started " + asctime()) +app.logger.info("Flask server started " + asctime()) app._hash_cache = {} + def catch_errors(func): @wraps(func) def inner(*args, **kwargs): try: return func(*args, **kwargs) except TemplateError as exc: - app.logger.error(u"Caught exception:\n{0}".format(exc.text)) + app.logger.error(f"Caught exception:\n{exc.text}") return render_template("error.mako", traceback=exc.text) except Exception: - app.logger.exception(u"Caught exception:") + app.logger.exception("Caught exception:") return render_template("error.mako", traceback=format_exc()) + return inner + @app.before_first_request def setup_app(): cache.bot = Bot(".earwigbot", 100) @@ -54,31 +56,43 @@ def setup_app(): globalize(num_workers=8) + @app.before_request def prepare_request(): g._db = None g.cookies = parse_cookies( - request.script_root or "/", request.environ.get("HTTP_COOKIE")) + request.script_root or "/", request.environ.get("HTTP_COOKIE") + ) g.new_cookies = [] + @app.after_request def add_new_cookies(response): for cookie in g.new_cookies: response.headers.add("Set-Cookie", cookie) return response + @app.after_request def write_access_log(response): - msg = u"%s %s %s %s -> %s" - app.logger.debug(msg, asctime(), request.method, request.path, - request.values.to_dict(), response.status_code) + msg = "%s %s %s %s -> %s" + app.logger.debug( + msg, + asctime(), + request.method, + request.path, + request.values.to_dict(), + response.status_code, + ) return response + @app.teardown_appcontext def close_databases(error): if g._db: g._db.close() + def external_url_handler(error, endpoint, values): if endpoint == "static" and "file" in values: fpath = path.join(app.static_folder, values["file"]) @@ -90,11 +104,13 @@ def external_url_handler(error, endpoint, values): with open(fpath, "rb") as f: hashstr = md5(f.read()).hexdigest() app._hash_cache[fpath] = (mtime, hashstr) - return "/static/{0}?v={1}".format(values["file"], hashstr) + return f"/static/{values['file']}?v={hashstr}" raise error + app.url_build_error_handlers.append(external_url_handler) + @app.route("/") @catch_errors def index(): @@ -102,8 +118,13 @@ def index(): update_sites() query = do_check() return render_template( - "index.mako", notice=notice, query=query, result=query.result, - turnitin_result=query.turnitin_result) + "index.mako", + notice=notice, + query=query, + result=query.result, + turnitin_result=query.turnitin_result, + ) + @app.route("/settings", methods=["GET", "POST"]) @catch_errors @@ -111,15 +132,20 @@ def settings(): status = process_settings() if request.method == "POST" else None update_sites() default = cache.bot.wiki.get_site() - kwargs = {"status": status, "default_lang": default.lang, - "default_project": default.project} + kwargs = { + "status": status, + "default_lang": default.lang, + "default_project": default.project, + } return render_template("settings.mako", **kwargs) + @app.route("/api") @catch_errors def api(): return render_template("api.mako", help=True) + @app.route("/api.json") @catch_errors def api_json(): @@ -134,7 +160,7 @@ def api_json(): except Exception as exc: result = format_api_error("unhandled_exception", exc) else: - errmsg = u"Unknown format: '{0}'".format(format) + errmsg = f"Unknown format: '{format}'" result = format_api_error("unknown_format", errmsg) if format == "jsonfm": @@ -144,5 +170,6 @@ def api_json(): resp.headers["Access-Control-Allow-Origin"] = "*" return resp -if __name__ == '__main__': + +if __name__ == "__main__": app.run() diff --git a/build.py b/build.py index 5519d3f..5d3e1c2 100755 --- a/build.py +++ b/build.py @@ -1,13 +1,13 @@ #! /usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function import os import subprocess + def process(*args): print(*args) - content = subprocess.check_output(args) + subprocess.run(args, check=True) + def main(): root = os.path.join(os.path.dirname(__file__), "static") @@ -15,10 +15,25 @@ def main(): for filename in filenames: name = os.path.relpath(os.path.join(dirpath, filename)) if filename.endswith(".js") and ".min." not in filename: - process("uglifyjs", "--compress", "-o", name.replace(".js", ".min.js"), "--", name) + process( + "uglifyjs", + "--compress", + "-o", + name.replace(".js", ".min.js"), + "--", + name, + ) if filename.endswith(".css") and ".min." not in filename: - process("postcss", "-u", "cssnano", "--no-map", name, "-o", - name.replace(".css", ".min.css")) + process( + "postcss", + "-u", + "cssnano", + "--no-map", + name, + "-o", + name.replace(".css", ".min.css"), + ) + if __name__ == "__main__": main() diff --git a/copyvios/__init__.py b/copyvios/__init__.py deleted file mode 100644 index 89907bf..0000000 --- a/copyvios/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b40dd06 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,44 @@ +[project] +name = "copyvios" +version = "1.0.dev0" +authors = [ + {name = "Ben Kurtovic", email = "ben@benkurtovic.com"}, +] +description = "A copyright violation detector web tool for Wikipedia articles" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "earwigbot[sql,copyvios] >= 0.4", + "mwparserfromhell >= 0.6", + "flask >= 3.0", + "flask-mako >= 0.4", + "mako >= 1.3.5", + "requests >= 2.32.3", + "SQLAlchemy >= 2.0.32", + "apsw >= 3.46.1", +] + +[project.urls] +Homepage = "https://github.com/earwig/copyvios" +Issues = "https://github.com/earwig/copyvios/issues" + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.pyright] +pythonVersion = "3.11" +exclude = [ + # TODO + "src/copyvios/*", + "app.py", +] +venvPath = "." +venv = "venv" + +[tool.ruff] +target-version = "py311" + +[tool.ruff.lint] +select = ["E4", "E7", "E9", "F", "I", "UP"] +ignore = ["F403"] diff --git a/scripts/log_analyzer.py b/scripts/log_analyzer.py index 2c77071..cf4d8f5 100755 --- a/scripts/log_analyzer.py +++ b/scripts/log_analyzer.py @@ -2,48 +2,59 @@ import argparse import re import sqlite3 +from typing import Any REGEX = re.compile( - r'^' - r'{address space usage: (?P-?\d+) bytes/(?P\w+)} ' - r'{rss usage: (?P-?\d+) bytes/(?P\w+)} ' - r'\[pid: (?P\d+)\|app: -\|req: -/-\] (?P[0-9.]+) \(-\) ' - r'{(?P\d+) vars in (?P\d+) bytes} ' - r'\[(?P[0-9A-Za-z: ]+)\] (?P\w+) (?P.*?) => ' - r'generated (?P\d+) bytes in (?P\d+) msecs ' - r'\((- http://hasty.ai)?(?P[A-Z0-9/.]+) (?P\d+)\) ' - r'(?P\d+) headers in (?P\d+) bytes ' - r'\((?P\d+) switches on core (?P\d+)\) ' - r'(?P.*?)' - r'( (?Phttps?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?' - r'$' + r"^" + r"{address space usage: (?P-?\d+) bytes/(?P\w+)} " + r"{rss usage: (?P-?\d+) bytes/(?P\w+)} " + r"\[pid: (?P\d+)\|app: -\|req: -/-\] (?P[0-9.]+) \(-\) " + r"{(?P\d+) vars in (?P\d+) bytes} " + r"\[(?P[0-9A-Za-z: ]+)\] (?P\w+) (?P.*?) => " + r"generated (?P\d+) bytes in (?P\d+) msecs " + r"\((- http://hasty.ai)?(?P[A-Z0-9/.]+) (?P\d+)\) " + r"(?P\d+) headers in (?P\d+) bytes " + r"\((?P\d+) switches on core (?P\d+)\) " + r"(?P.*?)" + r"( (?Phttps?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?" + r"$" ) -def save_logs(logs): + +def save_logs(logs: list[dict[str, Any]]) -> None: columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col]) - conn = sqlite3.Connection('logs.db') + conn = sqlite3.Connection("logs.db") cur = conn.cursor() - cur.execute('CREATE TABLE IF NOT EXISTS logs(%s)' % ', '.join(columns)) - cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)), - [[log[col] for col in columns] for log in logs]) + cur.execute(f"CREATE TABLE IF NOT EXISTS logs({', '.join(columns)})") + params = ", ".join(["?"] * len(columns)) + cur.executemany( + f"INSERT INTO logs VALUES ({params})", + [[log[col] for col in columns] for log in logs], + ) conn.commit() conn.close() -def read_logs(path): - with open(path, 'r', errors='replace') as fp: + +def read_logs(path: str) -> list[dict[str, Any]]: + with open(path, errors="replace") as fp: lines = fp.readlines() - parsed = [(line, REGEX.match(line.strip())) for line in lines - if line.startswith('{address space usage')] + parsed = [ + (line, REGEX.match(line.strip())) + for line in lines + if line.startswith("{address space usage") + ] for line, match in parsed: if not match: - print('failed to parse:', line.strip()) + print("failed to parse:", line.strip()) return [match.groupdict() for _, match in parsed if match] + def main(): parser = argparse.ArgumentParser() - parser.add_argument('logfile', default='uwsgi.log') + parser.add_argument("logfile", default="uwsgi.log") args = parser.parse_args() save_logs(read_logs(args.logfile)) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/src/copyvios/__init__.py b/src/copyvios/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/copyvios/api.py b/src/copyvios/api.py similarity index 62% rename from copyvios/api.py rename to src/copyvios/api.py index 703a0cb..9373d9c 100644 --- a/copyvios/api.py +++ b/src/copyvios/api.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - from collections import OrderedDict +from .checker import T_POSSIBLE, T_SUSPECT, do_check from .highlighter import highlight_delta -from .checker import do_check, T_POSSIBLE, T_SUSPECT from .misc import Query, cache from .sites import update_sites @@ -15,83 +13,107 @@ _CHECK_ERRORS = { "no URL": "The parameter 'url' is required for URL comparisons", "bad URI": "The given URI scheme is unsupported", "no data": "No text could be found in the given URL (note that only HTML " - "and plain text pages are supported, and content generated by " - "JavaScript or found inside iframes is ignored)", + "and plain text pages are supported, and content generated by " + "JavaScript or found inside iframes is ignored)", "timeout": "The given URL timed out before any data could be retrieved", "search error": "An error occurred while using the search engine; try " - "reloading or setting 'use_engine' to 0", + "reloading or setting 'use_engine' to 0", } + def _serialize_page(page): return OrderedDict((("title", page.title), ("url", page.url))) + def _serialize_source(source, show_skip=True): if not source: - return OrderedDict(( - ("url", None), ("confidence", 0.0), ("violation", "none"))) + return OrderedDict((("url", None), ("confidence", 0.0), ("violation", "none"))) conf = source.confidence - data = OrderedDict(( - ("url", source.url), - ("confidence", conf), - ("violation", "suspected" if conf >= T_SUSPECT else - "possible" if conf >= T_POSSIBLE else "none") - )) + data = OrderedDict( + ( + ("url", source.url), + ("confidence", conf), + ( + "violation", + ( + "suspected" + if conf >= T_SUSPECT + else "possible" + if conf >= T_POSSIBLE + else "none" + ), + ), + ) + ) if show_skip: data["skipped"] = source.skipped data["excluded"] = source.excluded return data + def _serialize_detail(result): source_chain, delta = result.best.chains article = highlight_delta(None, result.article_chain, delta) source = highlight_delta(None, source_chain, delta) return OrderedDict((("article", article), ("source", source))) + def format_api_error(code, info): if isinstance(info, BaseException): info = type(info).__name__ + ": " + str(info) - elif isinstance(info, unicode): - info = info.encode("utf8") error_inner = OrderedDict((("code", code), ("info", info))) return OrderedDict((("status", "error"), ("error", error_inner))) + def _hook_default(query): - info = u"Unknown action: '{0}'".format(query.action.lower()) + info = f"Unknown action: '{query.action.lower()}'" return format_api_error("unknown_action", info) + def _hook_check(query): do_check(query) if not query.submitted: - info = ("The query parameters 'project', 'lang', and either 'title' " - "or 'oldid' are required for checks") + info = ( + "The query parameters 'project', 'lang', and either 'title' " + "or 'oldid' are required for checks" + ) return format_api_error("missing_params", info) if query.error: info = _CHECK_ERRORS.get(query.error, "An unknown error occurred") return format_api_error(query.error.replace(" ", "_"), info) elif not query.site: - info = (u"The given site (project={0}, lang={1}) either doesn't exist," - u" is closed, or is private").format(query.project, query.lang) + info = ( + f"The given site (project={query.project}, lang={query.lang}) either doesn't exist," + " is closed, or is private" + ) return format_api_error("bad_site", info) elif not query.result: if query.oldid: - info = u"The revision ID couldn't be found: {0}" + info = "The revision ID couldn't be found: {0}" return format_api_error("bad_oldid", info.format(query.oldid)) else: - info = u"The page couldn't be found: {0}" + info = "The page couldn't be found: {0}" return format_api_error("bad_title", info.format(query.page.title)) result = query.result - data = OrderedDict(( - ("status", "ok"), - ("meta", OrderedDict(( - ("time", result.time), - ("queries", result.queries), - ("cached", result.cached), - ("redirected", bool(query.redirected_from)) - ))), - ("page", _serialize_page(query.page)) - )) + data = OrderedDict( + ( + ("status", "ok"), + ( + "meta", + OrderedDict( + ( + ("time", result.time), + ("queries", result.queries), + ("cached", result.cached), + ("redirected", bool(query.redirected_from)), + ) + ), + ), + ("page", _serialize_page(query.page)), + ) + ) if result.cached: data["meta"]["cache_time"] = result.cache_time if query.redirected_from: @@ -102,11 +124,13 @@ def _hook_check(query): data["detail"] = _serialize_detail(result) return data + def _hook_sites(query): update_sites() - return OrderedDict(( - ("status", "ok"), ("langs", cache.langs), ("projects", cache.projects) - )) + return OrderedDict( + (("status", "ok"), ("langs", cache.langs), ("projects", cache.projects)) + ) + _HOOKS = { "compare": _hook_check, @@ -114,13 +138,14 @@ _HOOKS = { "sites": _hook_sites, } + def handle_api_request(): query = Query() if query.version: try: query.version = int(query.version) except ValueError: - info = "The version string is invalid: {0}".format(query.version) + info = f"The version string is invalid: {query.version}" return format_api_error("invalid_version", info) else: query.version = 1 @@ -129,5 +154,5 @@ def handle_api_request(): action = query.action.lower() if query.action else "" return _HOOKS.get(action, _hook_default)(query) - info = "The API version is unsupported: {0}".format(query.version) + info = f"The API version is unsupported: {query.version}" return format_api_error("unsupported_version", info) diff --git a/copyvios/attribution.py b/src/copyvios/attribution.py similarity index 80% rename from copyvios/attribution.py rename to src/copyvios/attribution.py index dc3f67d..39ec265 100644 --- a/copyvios/attribution.py +++ b/src/copyvios/attribution.py @@ -1,20 +1,19 @@ -# -*- coding: utf-8 -*- - -from __future__ import unicode_literals - from earwigbot.wiki import NS_TEMPLATE __all__ = ["get_attribution_info"] ATTRIB_TEMPLATES = { "enwiki": { - "CC-notice", "Cc-notice", + "CC-notice", + "Cc-notice", "Citation-attribution", - "Free-content attribution", "Open-source attribution", + "Free-content attribution", + "Open-source attribution", "Source-attribution", } } + def get_attribution_info(site, page): """Check to see if the given page has some kind of attribution info. @@ -30,7 +29,7 @@ def get_attribution_info(site, page): for template in page.parse().ifilter_templates(): if template.name.matches(templates): - name = unicode(template.name).strip() + name = str(template.name).strip() title = name if ":" in name else prefix + ":" + name return name, site.get_page(title).url return None diff --git a/copyvios/background.py b/src/copyvios/background.py similarity index 82% rename from copyvios/background.py rename to src/copyvios/background.py index ef8d8e2..6a8ec77 100644 --- a/copyvios/background.py +++ b/src/copyvios/background.py @@ -1,10 +1,10 @@ -# -*- coding: utf-8 -*- - -from datetime import datetime, timedelta -from json import loads import random import re -import urllib +import urllib.error +import urllib.parse +import urllib.request +from datetime import datetime, timedelta +from json import loads from earwigbot import exceptions from flask import g @@ -13,32 +13,39 @@ from .misc import cache __all__ = ["set_background"] + def _get_commons_site(): try: return cache.bot.wiki.get_site("commonswiki") except exceptions.SiteNotFoundError: return cache.bot.wiki.add_site(project="wikimedia", lang="commons") + def _load_file(site, filename): data = site.api_query( - action="query", prop="imageinfo", iiprop="url|size|canonicaltitle", - titles="File:" + filename) - res = data["query"]["pages"].values()[0]["imageinfo"][0] - name = res["canonicaltitle"][len("File:"):].replace(" ", "_") + action="query", + prop="imageinfo", + iiprop="url|size|canonicaltitle", + titles="File:" + filename, + ) + res = list(data["query"]["pages"].values())[0]["imageinfo"][0] + name = res["canonicaltitle"][len("File:") :].replace(" ", "_") return name, res["url"], res["descriptionurl"], res["width"], res["height"] + def _get_fresh_potd(): site = _get_commons_site() date = datetime.utcnow().strftime("%Y-%m-%d") page = site.get_page("Template:Potd/" + date) - regex = ur"\{\{Potd filename\|(?:1=)?(.*?)\|.*?\}\}" + regex = r"\{\{Potd filename\|(?:1=)?(.*?)\|.*?\}\}" filename = re.search(regex, page.get()).group(1) return _load_file(site, filename) + def _get_fresh_list(): site = _get_commons_site() page = site.get_page("User:The Earwig/POTD") - regex = ur"\*\*?\s*\[\[:File:(.*?)\]\]" + regex = r"\*\*?\s*\[\[:File:(.*?)\]\]" filenames = re.findall(regex, page.get()) # Ensure all workers share the same background each day: @@ -46,6 +53,7 @@ def _get_fresh_list(): filename = random.choice(filenames) return _load_file(site, filename) + def _build_url(screen, filename, url, imgwidth, imgheight): width = screen["width"] if float(imgwidth) / imgheight > float(screen["width"]) / screen["height"]: @@ -53,12 +61,11 @@ def _build_url(screen, filename, url, imgwidth, imgheight): if width >= imgwidth: return url url = url.replace("/commons/", "/commons/thumb/") - return "%s/%dpx-%s" % (url, width, urllib.quote(filename.encode("utf8"))) + return "%s/%dpx-%s" % (url, width, urllib.parse.quote(filename.encode("utf8"))) + + +_BACKGROUNDS = {"potd": _get_fresh_potd, "list": _get_fresh_list} -_BACKGROUNDS = { - "potd": _get_fresh_potd, - "list": _get_fresh_list -} def _get_background(selected): if not cache.last_background_updates: @@ -73,6 +80,7 @@ def _get_background(selected): cache.last_background_updates[selected] = datetime.utcnow().date() return cache.background_data[selected] + def set_background(selected): if "CopyviosScreenCache" in g.cookies: screen_cache = g.cookies["CopyviosScreenCache"].value diff --git a/copyvios/checker.py b/src/copyvios/checker.py similarity index 81% rename from copyvios/checker.py rename to src/copyvios/checker.py index c892db3..e3856c6 100644 --- a/copyvios/checker.py +++ b/src/copyvios/checker.py @@ -1,17 +1,15 @@ -# -*- coding: utf-8 -*- - +import re from datetime import datetime, timedelta from hashlib import sha256 from logging import getLogger -import re -from urlparse import urlparse +from urllib.parse import urlparse from earwigbot import exceptions from earwigbot.wiki.copyvios.markov import EMPTY, MarkovChain from earwigbot.wiki.copyvios.parsers import ArticleTextParser -from earwigbot.wiki.copyvios.result import CopyvioSource, CopyvioCheckResult +from earwigbot.wiki.copyvios.result import CopyvioCheckResult, CopyvioSource -from .misc import Query, get_db, get_cursor, get_sql_error, sql_dialect +from .misc import Query, get_cursor, get_db, get_sql_error, sql_dialect from .sites import get_site from .turnitin import search_turnitin @@ -22,9 +20,11 @@ T_SUSPECT = 0.75 _LOGGER = getLogger("copyvios.checker") + def _coerce_bool(val): return val and val not in ("0", "false") + def do_check(query=None): if not query: query = Query() @@ -44,6 +44,7 @@ def do_check(query=None): _get_results(query, follow=not _coerce_bool(query.noredirect)) return query + def _get_results(query, follow=True): if query.oldid: if not re.match(r"^\d+$", query.oldid): @@ -100,8 +101,9 @@ def _get_results(query, follow=True): degree = int(query.degree) except ValueError: pass - result = page.copyvio_compare(query.url, min_confidence=T_SUSPECT, - max_time=10, degree=degree) + result = page.copyvio_compare( + query.url, min_confidence=T_SUSPECT, max_time=10, degree=degree + ) if result.best.chains[0] is EMPTY: query.error = "timeout" if result.time > 10 else "no data" return @@ -110,12 +112,18 @@ def _get_results(query, follow=True): else: query.error = "bad action" + def _get_page_by_revid(site, revid): try: - res = site.api_query(action="query", prop="info|revisions", revids=revid, - rvprop="content|timestamp", inprop="protection|url", - rvslots="main") - page_data = res["query"]["pages"].values()[0] + res = site.api_query( + action="query", + prop="info|revisions", + revids=revid, + rvprop="content|timestamp", + inprop="protection|url", + rvslots="main", + ) + page_data = list(res["query"]["pages"].values())[0] title = page_data["title"] # Only need to check that these exist: revision = page_data["revisions"][0] @@ -131,24 +139,30 @@ def _get_page_by_revid(site, revid): page._load_content(res) return page + def _perform_check(query, page, use_engine, use_links): conn = get_db() sql_error = get_sql_error() - mode = "{0}:{1}:".format(use_engine, use_links) + mode = f"{use_engine}:{use_links}:" if not _coerce_bool(query.nocache): try: query.result = _get_cached_results( - page, conn, mode, _coerce_bool(query.noskip)) + page, conn, mode, _coerce_bool(query.noskip) + ) except sql_error: _LOGGER.exception("Failed to retrieve cached results") if not query.result: try: query.result = page.copyvio_check( - min_confidence=T_SUSPECT, max_queries=8, max_time=30, - no_searches=not use_engine, no_links=not use_links, - short_circuit=not query.noskip) + min_confidence=T_SUSPECT, + max_queries=8, + max_time=30, + no_searches=not use_engine, + no_links=not use_links, + short_circuit=not query.noskip, + ) except exceptions.SearchQueryError as exc: query.error = "search error" query.exception = exc @@ -159,6 +173,7 @@ def _perform_check(query, page, use_engine, use_links): except sql_error: _LOGGER.exception("Failed to cache results") + def _get_cached_results(page, conn, mode, noskip): query1 = """SELECT cache_time, cache_queries, cache_process_time, cache_possible_miss @@ -167,7 +182,7 @@ def _get_cached_results(page, conn, mode, noskip): query2 = """SELECT cdata_url, cdata_confidence, cdata_skipped, cdata_excluded FROM cache_data WHERE cdata_cache_id = ?""" - cache_id = buffer(sha256(mode + page.get().encode("utf8")).digest()) + cache_id = sha256(mode + page.get().encode("utf8")).digest() cursor = conn.cursor() cursor.execute(query1, (cache_id,)) @@ -186,8 +201,9 @@ def _get_cached_results(page, conn, mode, noskip): if not data: # TODO: do something less hacky for this edge case article_chain = MarkovChain(ArticleTextParser(page.get()).strip()) - result = CopyvioCheckResult(False, [], queries, check_time, - article_chain, possible_miss) + result = CopyvioCheckResult( + False, [], queries, check_time, article_chain, possible_miss + ) result.cached = True result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") result.cache_age = _format_date(cache_time) @@ -216,8 +232,11 @@ def _get_cached_results(page, conn, mode, noskip): result.cache_age = _format_date(cache_time) return result + def _format_date(cache_time): - formatter = lambda n, w: "{0} {1}{2}".format(n, w, "" if n == 1 else "s") + def formatter(n, w): + return "{} {}{}".format(n, w, "" if n == 1 else "s") + diff = datetime.utcnow() - cache_time total_seconds = diff.days * 86400 + diff.seconds if total_seconds > 3600: @@ -226,23 +245,34 @@ def _format_date(cache_time): return formatter(total_seconds / 60, "minute") return formatter(total_seconds, "second") + def _cache_result(page, result, conn, mode): - expiry = sql_dialect(mysql="DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)", - sqlite="STRFTIME('%s', 'now', '-3 days')") + expiry = sql_dialect( + mysql="DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)", + sqlite="STRFTIME('%s', 'now', '-3 days')", + ) query1 = "DELETE FROM cache WHERE cache_id = ?" - query2 = "DELETE FROM cache WHERE cache_time < %s" % expiry + query2 = f"DELETE FROM cache WHERE cache_time < {expiry}" query3 = """INSERT INTO cache (cache_id, cache_queries, cache_process_time, cache_possible_miss) VALUES (?, ?, ?, ?)""" query4 = """INSERT INTO cache_data (cdata_cache_id, cdata_url, cdata_confidence, cdata_skipped, cdata_excluded) VALUES (?, ?, ?, ?, ?)""" - cache_id = buffer(sha256(mode + page.get().encode("utf8")).digest()) - data = [(cache_id, source.url[:1024], source.confidence, source.skipped, - source.excluded) - for source in result.sources] + cache_id = sha256(mode + page.get().encode("utf8")).digest() + data = [ + ( + cache_id, + source.url[:1024], + source.confidence, + source.skipped, + source.excluded, + ) + for source in result.sources + ] with get_cursor(conn) as cursor: cursor.execute(query1, (cache_id,)) cursor.execute(query2) - cursor.execute(query3, (cache_id, result.queries, result.time, - result.possible_miss)) + cursor.execute( + query3, (cache_id, result.queries, result.time, result.possible_miss) + ) cursor.executemany(query4, data) diff --git a/copyvios/cookies.py b/src/copyvios/cookies.py similarity index 74% rename from copyvios/cookies.py rename to src/copyvios/cookies.py index 2b5b410..5daf798 100644 --- a/copyvios/cookies.py +++ b/src/copyvios/cookies.py @@ -1,39 +1,38 @@ -# -*- coding: utf-8 -*- - import base64 -from Cookie import CookieError, SimpleCookie from datetime import datetime, timedelta +from http.cookies import CookieError, SimpleCookie from flask import g __all__ = ["parse_cookies", "set_cookie", "delete_cookie"] + class _CookieManager(SimpleCookie): MAGIC = "--cpv2" def __init__(self, path, cookies): self._path = path try: - super(_CookieManager, self).__init__(cookies) + super().__init__(cookies) except CookieError: - super(_CookieManager, self).__init__() - for cookie in self.keys(): + super().__init__() + for cookie in list(self.keys()): if self[cookie].value is False: del self[cookie] def value_decode(self, value): - unquoted = super(_CookieManager, self).value_decode(value)[0] + unquoted = super().value_decode(value)[0] try: decoded = base64.b64decode(unquoted).decode("utf8") except (TypeError, UnicodeDecodeError): return False, "False" if decoded.startswith(self.MAGIC): - return decoded[len(self.MAGIC):], value + return decoded[len(self.MAGIC) :], value return False, "False" def value_encode(self, value): encoded = base64.b64encode(self.MAGIC + value.encode("utf8")) - quoted = super(_CookieManager, self).value_encode(encoded)[1] + quoted = super().value_encode(encoded)[1] return value, quoted @property @@ -44,6 +43,7 @@ class _CookieManager(SimpleCookie): def parse_cookies(path, cookies): return _CookieManager(path, cookies) + def set_cookie(key, value, days=0): g.cookies[key] = value if days: @@ -53,6 +53,7 @@ def set_cookie(key, value, days=0): g.cookies[key]["path"] = g.cookies.path g.new_cookies.append(g.cookies[key].OutputString()) + def delete_cookie(key): - set_cookie(key, u"", days=-1) + set_cookie(key, "", days=-1) del g.cookies[key] diff --git a/copyvios/highlighter.py b/src/copyvios/highlighter.py similarity index 69% rename from copyvios/highlighter.py rename to src/copyvios/highlighter.py index a3fb21f..009d93b 100644 --- a/copyvios/highlighter.py +++ b/src/copyvios/highlighter.py @@ -1,13 +1,12 @@ -# -*- coding: utf-8 -*- - from collections import deque -from re import sub, UNICODE +from re import UNICODE, sub from earwigbot.wiki.copyvios.markov import EMPTY_INTERSECTION from markupsafe import escape __all__ = ["highlight_delta"] + def highlight_delta(context, chain, delta): degree = chain.degree - 1 highlights = [False] * degree @@ -18,7 +17,7 @@ def highlight_delta(context, chain, delta): word = _strip_word(chain, word) block.append(word) if tuple(block) in delta.chain: - highlights[-1 * degree:] = [True] * degree + highlights[-1 * degree :] = [True] * degree highlights.append(True) else: highlights.append(False) @@ -38,11 +37,12 @@ def highlight_delta(context, chain, delta): last = i - degree + 1 == numwords words.append(_highlight_word(word, before, after, first, last)) else: - words.append(unicode(escape(word))) - result.append(u" ".join(words)) + words.append(str(escape(word))) + result.append(" ".join(words)) i += 1 - return u"

".join(result) + return "

".join(result) + def _get_next(paragraphs): body = [] @@ -58,41 +58,44 @@ def _get_next(paragraphs): break return body + def _highlight_word(word, before, after, first, last): if before and after: # Word is in the middle of a highlighted block: - res = unicode(escape(word)) + res = str(escape(word)) if first: - res = u'' + res + res = '' + res if last: - res += u'' + res += "" elif after: # Word is the first in a highlighted block: - res = u'' + _fade_word(word, u"in") + res = '' + _fade_word(word, "in") if last: - res += u"" + res += "" elif before: # Word is the last in a highlighted block: - res = _fade_word(word, u"out") + u"" + res = _fade_word(word, "out") + "" if first: - res = u'' + res + res = '' + res else: - res = unicode(escape(word)) + res = str(escape(word)) return res + def _fade_word(word, dir): if len(word) <= 4: - word = unicode(escape(word)) - return u'{1}'.format(dir, word) - if dir == u"out": - before, after = unicode(escape(word[:-4])), unicode(escape(word[-4:])) - base = u'{0}{1}' + word = str(escape(word)) + return f'{word}' + if dir == "out": + before, after = str(escape(word[:-4])), str(escape(word[-4:])) + base = '{0}{1}' return base.format(before, after) else: - before, after = unicode(escape(word[:4])), unicode(escape(word[4:])) - base = u'{0}{1}' + before, after = str(escape(word[:4])), str(escape(word[4:])) + base = '{0}{1}' return base.format(before, after) + def _strip_word(chain, word): if word == chain.START or word == chain.END: return word diff --git a/copyvios/misc.py b/src/copyvios/misc.py similarity index 81% rename from copyvios/misc.py rename to src/copyvios/misc.py index b4cbca2..c924fa6 100644 --- a/copyvios/misc.py +++ b/src/copyvios/misc.py @@ -1,19 +1,18 @@ -# -*- coding: utf-8 -*- - -from contextlib import contextmanager import datetime +from contextlib import contextmanager from os.path import expanduser, join import apsw -from flask import g, request import oursql +from flask import g, request from sqlalchemy.pool import manage oursql = manage(oursql) __all__ = ["Query", "cache", "get_db", "get_notice", "httpsfix", "urlstrip"] -class Query(object): + +class Query: def __init__(self, method="GET"): self.query = {} data = request.form if method == "POST" else request.args @@ -25,14 +24,14 @@ class Query(object): def __setattr__(self, key, value): if key == "query": - super(Query, self).__setattr__(key, value) + super().__setattr__(key, value) else: self.query[key] = value -class _AppCache(object): +class _AppCache: def __init__(self): - super(_AppCache, self).__setattr__("_data", {}) + super().__setattr__("_data", {}) def __getattr__(self, key): return self._data[key] @@ -43,6 +42,7 @@ class _AppCache(object): cache = _AppCache() + def _connect_to_db(engine, args): if engine == "mysql": args["read_default_file"] = expanduser("~/.my.cnf") @@ -54,15 +54,17 @@ def _connect_to_db(engine, args): conn = apsw.Connection(dbpath) conn.cursor().execute("PRAGMA foreign_keys = ON") return conn - raise ValueError("Unknown engine: %s" % engine) + raise ValueError(f"Unknown engine: {engine}") + def get_db(): if not g._db: - args = cache.bot.config.wiki["_copyviosSQL"].copy() + args = cache.bot.config.wiki["copyvios"].copy() g._engine = engine = args.pop("engine", "mysql").lower() g._db = _connect_to_db(engine, args) return g._db + @contextmanager def get_cursor(conn): if g._engine == "mysql": @@ -72,21 +74,24 @@ def get_cursor(conn): with conn: yield conn.cursor() else: - raise ValueError("Unknown engine: %s" % g._engine) + raise ValueError(f"Unknown engine: {g._engine}") + def get_sql_error(): if g._engine == "mysql": return oursql.Error if g._engine == "sqlite": return apsw.Error - raise ValueError("Unknown engine: %s" % g._engine) + raise ValueError(f"Unknown engine: {g._engine}") + def sql_dialect(mysql, sqlite): if g._engine == "mysql": return mysql if g._engine == "sqlite": return sqlite - raise ValueError("Unknown engine: %s" % g._engine) + raise ValueError(f"Unknown engine: {g._engine}") + def get_notice(): try: @@ -95,16 +100,19 @@ def get_notice(): if lines[0] == "": return "\n".join(lines[1:]) return None - except IOError: + except OSError: return None + def httpsfix(context, url): if url.startswith("http://"): - url = url[len("http:"):] + url = url[len("http:") :] return url + def parse_wiki_timestamp(timestamp): - return datetime.datetime.strptime(timestamp, '%Y%m%d%H%M%S') + return datetime.datetime.strptime(timestamp, "%Y%m%d%H%M%S") + def urlstrip(context, url): if url.startswith("http://"): diff --git a/copyvios/settings.py b/src/copyvios/settings.py similarity index 83% rename from copyvios/settings.py rename to src/copyvios/settings.py index fcf3948..00217d1 100644 --- a/copyvios/settings.py +++ b/src/copyvios/settings.py @@ -1,13 +1,12 @@ -# -*- coding: utf-8 -*- - from flask import g from markupsafe import escape -from .cookies import set_cookie, delete_cookie +from .cookies import delete_cookie, set_cookie from .misc import Query __all__ = ["process_settings"] + def process_settings(): query = Query(method="POST") if query.action == "set": @@ -18,6 +17,7 @@ def process_settings(): status = None return status + def _do_set(query): cookies = g.cookies changes = set() @@ -39,18 +39,19 @@ def _do_set(query): changes.add("background") if changes: changes = ", ".join(sorted(list(changes))) - return "Updated {0}.".format(changes) + return f"Updated {changes}." return None + def _do_delete(query): cookies = g.cookies if query.cookie in cookies: delete_cookie(query.cookie.encode("utf8")) - template = u'Deleted cookie {0}.' + template = 'Deleted cookie {0}.' return template.format(escape(query.cookie)) elif query.all: number = len(cookies) - for cookie in cookies.values(): + for cookie in list(cookies.values()): delete_cookie(cookie.key) - return "Deleted {0} cookies.".format(number) + return f"Deleted {number} cookies." return None diff --git a/copyvios/sites.py b/src/copyvios/sites.py similarity index 85% rename from copyvios/sites.py rename to src/copyvios/sites.py index df432be..3dc5706 100644 --- a/copyvios/sites.py +++ b/src/copyvios/sites.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- - from time import time -from urlparse import urlparse +from urllib.parse import urlparse from earwigbot import exceptions @@ -9,6 +7,7 @@ from .misc import cache __all__ = ["get_site", "update_sites"] + def get_site(query): lang, project, name = query.lang, query.project, query.name wiki = cache.bot.wiki @@ -24,11 +23,13 @@ def get_site(query): except exceptions.SiteNotFoundError: return _add_site(lang, project) + def update_sites(): if time() - cache.last_sites_update > 60 * 60 * 24 * 7: cache.langs, cache.projects = _load_sites() cache.last_sites_update = time() + def _add_site(lang, project): update_sites() if not any(project == item[0] for item in cache.projects): @@ -40,12 +41,13 @@ def _add_site(lang, project): except (exceptions.APIError, exceptions.LoginError): return None + def _load_sites(): site = cache.bot.wiki.get_site() matrix = site.api_query(action="sitematrix")["sitematrix"] del matrix["count"] langs, projects = set(), set() - for site in matrix.itervalues(): + for site in matrix.values(): if isinstance(site, list): # Special sites bad_sites = ["closed", "private", "fishbowl"] for special in site: @@ -55,19 +57,19 @@ def _load_sites(): lang, project = "www", full.split(".")[0] else: lang, project = full.rsplit(".", 2)[:2] - code = u"{0}::{1}".format(lang, special["dbname"]) + code = "{}::{}".format(lang, special["dbname"]) name = special["code"].capitalize() - langs.add((code, u"{0} ({1})".format(lang, name))) + langs.add((code, f"{lang} ({name})")) projects.add((project, project.capitalize())) else: this = set() for web in site["site"]: if "closed" in web: continue - proj = "wikipedia" if web["code"] == u"wiki" else web["code"] + proj = "wikipedia" if web["code"] == "wiki" else web["code"] this.add((proj, proj.capitalize())) if this: code = site["code"] - langs.add((code, u"{0} ({1})".format(code, site["name"]))) + langs.add((code, "{} ({})".format(code, site["name"]))) projects |= this return list(sorted(langs)), list(sorted(projects)) diff --git a/copyvios/turnitin.py b/src/copyvios/turnitin.py similarity index 72% rename from copyvios/turnitin.py rename to src/copyvios/turnitin.py index 15d7ded..6026c72 100644 --- a/copyvios/turnitin.py +++ b/src/copyvios/turnitin.py @@ -1,17 +1,17 @@ -# -*- coding: utf-8 -*- -from ast import literal_eval import re +from ast import literal_eval import requests from .misc import parse_wiki_timestamp -__all__ = ['search_turnitin', 'TURNITIN_API_ENDPOINT'] +__all__ = ["search_turnitin", "TURNITIN_API_ENDPOINT"] + +TURNITIN_API_ENDPOINT = "https://eranbot.toolforge.org/plagiabot/api.py" -TURNITIN_API_ENDPOINT = 'https://eranbot.toolforge.org/plagiabot/api.py' def search_turnitin(page_title, lang): - """ Search the Plagiabot database for Turnitin reports for a page. + """Search the Plagiabot database for Turnitin reports for a page. Keyword arguments: page_title -- string containing the page title @@ -21,14 +21,16 @@ def search_turnitin(page_title, lang): """ return TurnitinResult(_make_api_request(page_title, lang)) + def _make_api_request(page_title, lang): - """ Query the plagiabot API for Turnitin reports for a given page. - """ - stripped_page_title = page_title.replace(' ', '_') - api_parameters = {'action': 'suspected_diffs', - 'page_title': stripped_page_title, - 'lang': lang, - 'report': 1} + """Query the plagiabot API for Turnitin reports for a given page.""" + stripped_page_title = page_title.replace(" ", "_") + api_parameters = { + "action": "suspected_diffs", + "page_title": stripped_page_title, + "lang": lang, + "report": 1, + } result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters, verify=False) # use literal_eval to *safely* parse the resulting dict-containing string @@ -38,14 +40,16 @@ def _make_api_request(page_title, lang): parsed_api_result = [] return parsed_api_result -class TurnitinResult(object): - """ Container class for TurnitinReports. Each page may have zero or + +class TurnitinResult: + """Container class for TurnitinReports. Each page may have zero or more reports of plagiarism. The list will have multiple TurnitinReports if plagiarism has been detected for more than one revision. TurnitinResult.reports -- list containing >= 0 TurnitinReport items """ + def __init__(self, turnitin_data): """ Keyword argument: @@ -54,14 +58,16 @@ class TurnitinResult(object): self.reports = [] for item in turnitin_data: report = TurnitinReport( - item['diff_timestamp'], item['diff'], item['report']) + item["diff_timestamp"], item["diff"], item["report"] + ) self.reports.append(report) def __repr__(self): return str(self.__dict__) -class TurnitinReport(object): - """ Contains data for each Turnitin report (one on each potentially + +class TurnitinReport: + """Contains data for each Turnitin report (one on each potentially plagiarized revision). TurnitinReport.reportid -- Turnitin report ID, taken from plagiabot @@ -72,6 +78,7 @@ class TurnitinReport(object): words -- number of words found in both source and revision url -- url for the possibly-plagiarized source """ + def __init__(self, timestamp, diffid, report): """ Keyword argument: @@ -86,9 +93,7 @@ class TurnitinReport(object): self.sources = [] for item in self.report_data[1]: - source = {'percent': item[0], - 'words': item[1], - 'url': item[2]} + source = {"percent": item[0], "words": item[1], "url": item[2]} self.sources.append(source) def __repr__(self): @@ -96,12 +101,11 @@ class TurnitinReport(object): def _parse_report(self, report_text): # extract report ID - report_id_pattern = re.compile(r'\?rid=(\d*)') + report_id_pattern = re.compile(r"\?rid=(\d*)") report_id = report_id_pattern.search(report_text).groups()[0] # extract percent match, words, and URL for each source in the report - extract_info_pattern = re.compile( - r'\n\* \w\s+(\d*)\% (\d*) words at \[(.*?) ') + extract_info_pattern = re.compile(r"\n\* \w\s+(\d*)\% (\d*) words at \[(.*?) ") results = extract_info_pattern.findall(report_text) return (report_id, results) diff --git a/static/api.min.css b/static/api.min.css index a261ff9..97b2e3e 100644 --- a/static/api.min.css +++ b/static/api.min.css @@ -1 +1 @@ -h1,h2{font-family:sans-serif}pre{white-space:pre-wrap}#help{margin:auto;max-width:1200px}.json{font-family:monospace}.indent{display:inline-block;padding-left:2em}.code{font-family:monospace}.resp-cond,.resp-desc,.resp-dtype{padding:0 .25em;background-color:#eee}.resp-dtype{color:#009}.resp-cond:before,.resp-dtype:before{content:"("}.resp-cond:after,.resp-dtype:after{content:")"}.resp-desc{color:#050}.resp-cond{color:#900;font-style:italic}.param-key{color:#009;font-weight:700}.param-val{color:#900;font-weight:700}.parameters{margin:1em 0}.parameters tr:first-child{font-family:sans-serif;font-size:1.17em;color:#fff}.parameters tr:first-child th{background-color:#369}.parameters td,.parameters th{padding:.2em .5em}.parameters th{background-color:#f0f0f0}.parameters td:first-child{font-family:monospace}.parameters tr:nth-child(2n+3){background-color:#e0e0e0}.parameters tr:nth-child(2n+4){background-color:#f0f0f0}a:link,a:visited{color:#373;text-decoration:none}a:hover{color:#040}a:active,a:hover{text-decoration:underline}a:active{color:#404}.no-color:link,.no-color:visited{color:#000;text-decoration:none}.no-color:active,.no-color:hover{color:#000;text-decoration:underline} \ No newline at end of file +h1,h2{font-family:sans-serif}pre{white-space:pre-wrap}#help{margin:auto;max-width:1200px}.json{font-family:monospace}.indent{display:inline-block;padding-left:2em}.code{font-family:monospace}.resp-cond,.resp-desc,.resp-dtype{background-color:#eee;padding:0 .25em}.resp-dtype{color:#009}.resp-cond:before,.resp-dtype:before{content:"("}.resp-cond:after,.resp-dtype:after{content:")"}.resp-desc{color:#050}.resp-cond{color:#900;font-style:italic}.param-key{color:#009;font-weight:700}.param-val{color:#900;font-weight:700}.parameters{margin:1em 0}.parameters tr:first-child{color:#fff;font-family:sans-serif;font-size:1.17em}.parameters tr:first-child th{background-color:#369}.parameters td,.parameters th{padding:.2em .5em}.parameters th{background-color:#f0f0f0}.parameters td:first-child{font-family:monospace}.parameters tr:nth-child(2n+3){background-color:#e0e0e0}.parameters tr:nth-child(2n+4){background-color:#f0f0f0}a:link,a:visited{color:#373;text-decoration:none}a:hover{color:#040}a:active,a:hover{text-decoration:underline}a:active{color:#404}.no-color:link,.no-color:visited{color:#000;text-decoration:none}.no-color:active,.no-color:hover{color:#000;text-decoration:underline} \ No newline at end of file diff --git a/static/script.min.js b/static/script.min.js index e415611..8ca38b7 100644 --- a/static/script.min.js +++ b/static/script.min.js @@ -1 +1 @@ -function update_screen_size(){var cache=cache_cookie(),data={width:window.screen.availWidth,height:window.screen.availHeight};cache&&cache.width==data.width&&cache.height==data.height||set_cookie("CopyviosScreenCache",JSON.stringify(data),1095)}function cache_cookie(){var cookie=get_cookie("CopyviosScreenCache");if(cookie)try{data=JSON.parse(cookie);var width=data.width,height=data.height;if(width&&height)return{width:width,height:height}}catch(SyntaxError){}return!1}function get_cookie(name){for(var nameEQ=name+"=",ca=document.cookie.split(";"),i=0;i",{id:"notice-collapse-trigger",href:"#",text:"[show]",click:function(){return toggle_notice(),!1}})),details.hide())}$(document).ready(function(){$("#action-search").change(function(){$(".cv-search").prop("disabled",!1),$(".cv-compare").prop("disabled",!0),$(".cv-search-oo-ui").addClass("oo-ui-widget-enabled").removeClass("oo-ui-widget-disabled"),$(".cv-compare-oo-ui").addClass("oo-ui-widget-disabled").removeClass("oo-ui-widget-enabled")}),$("#action-compare").change(function(){$(".cv-search").prop("disabled",!0),$(".cv-compare").prop("disabled",!1),$(".cv-search-oo-ui").addClass("oo-ui-widget-disabled").removeClass("oo-ui-widget-enabled"),$(".cv-compare-oo-ui").addClass("oo-ui-widget-enabled").removeClass("oo-ui-widget-disabled")}),$("#action-search").is(":checked")&&$("#action-search").change(),$("#action-compare").is(":checked")&&$("#action-compare").change(),$("#cv-form").submit(function(){$("#action-search").is(":checked")&&$.each([["engine","use_engine"],["links","use_links"],["turnitin","turnitin"]],function(i,val){$("#cv-cb-"+val[0]).is(":checked")&&$("#cv-form input[type='hidden'][name='"+val[1]+"']").prop("disabled",!0)}),$("#cv-form button[type='submit']").prop("disabled",!0).css("cursor","progress").parent().addClass("oo-ui-widget-disabled").removeClass("oo-ui-widget-enabled")}),0<=$("#cv-additional").length&&($("#cv-additional").css("display","block"),$(".source-default-hidden").css("display","none"),$("#show-additional-sources").click(function(){return $(".source-default-hidden").css("display",""),$("#cv-additional").css("display","none"),!1})),install_notice()}); \ No newline at end of file +function update_screen_size(){var cache=cache_cookie(),data={width:window.screen.availWidth,height:window.screen.availHeight};cache&&cache.width==data.width&&cache.height==data.height||set_cookie("CopyviosScreenCache",JSON.stringify(data),1095)}function cache_cookie(){var cookie=get_cookie("CopyviosScreenCache");if(cookie)try{var width=(data=JSON.parse(cookie)).width,height=data.height;if(width&&height)return{width:width,height:height}}catch(SyntaxError){}return!1}function get_cookie(name){for(var nameEQ=name+"=",ca=document.cookie.split(";"),i=0;i",{id:"notice-collapse-trigger",href:"#",text:"[show]",click:function(){return toggle_notice(),!1}})),details.hide())}$(document).ready(function(){$("#action-search").change(function(){$(".cv-search").prop("disabled",!1),$(".cv-compare").prop("disabled",!0),$(".cv-search-oo-ui").addClass("oo-ui-widget-enabled").removeClass("oo-ui-widget-disabled"),$(".cv-compare-oo-ui").addClass("oo-ui-widget-disabled").removeClass("oo-ui-widget-enabled")}),$("#action-compare").change(function(){$(".cv-search").prop("disabled",!0),$(".cv-compare").prop("disabled",!1),$(".cv-search-oo-ui").addClass("oo-ui-widget-disabled").removeClass("oo-ui-widget-enabled"),$(".cv-compare-oo-ui").addClass("oo-ui-widget-enabled").removeClass("oo-ui-widget-disabled")}),$("#action-search").is(":checked")&&$("#action-search").change(),$("#action-compare").is(":checked")&&$("#action-compare").change(),$("#cv-form").submit(function(){$("#action-search").is(":checked")&&$.each([["engine","use_engine"],["links","use_links"],["turnitin","turnitin"]],function(i,val){$("#cv-cb-"+val[0]).is(":checked")&&$("#cv-form input[type='hidden'][name='"+val[1]+"']").prop("disabled",!0)}),$("#cv-form button[type='submit']").prop("disabled",!0).css("cursor","progress").parent().addClass("oo-ui-widget-disabled").removeClass("oo-ui-widget-enabled")}),0<=$("#cv-additional").length&&($("#cv-additional").css("display","block"),$(".source-default-hidden").css("display","none"),$("#show-additional-sources").click(function(){return $(".source-default-hidden").css("display",""),$("#cv-additional").css("display","none"),!1})),install_notice()}); \ No newline at end of file diff --git a/static/style.min.css b/static/style.min.css index 56e876f..e05b5c5 100644 --- a/static/style.min.css +++ b/static/style.min.css @@ -1 +1 @@ -body,html{height:100%;margin:0}body{display:flex;flex-direction:column;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;font-size:14px;color:#000;background-color:#eaecf0;background-attachment:fixed;background-position:50%;background-size:cover}#container{flex:auto;line-height:1.25;margin:0 auto}#container.splash{display:flex;flex-direction:column;justify-content:center}@media only screen and (min-width:1200px){#container.splash{min-width:1200px;max-width:1600px}}@media only screen and (max-width:1200px){#container.splash{width:100%}}#container.splash>.padding{height:25%}#content{background-color:#fff;border:1px solid #c8ccd1;filter:drop-shadow(0 0 10px rgba(0,0,0,.25));margin:1.5em 3em;padding:1em}@media only screen and (max-width:1000px){#content{margin:1em}}@media only screen and (max-width:400px){#content{margin:0}}header{background-color:#eaecf0;padding:.2em 1em}header>*{vertical-align:middle;display:inline-block}header h1{font-size:2.25em;font-weight:400;margin:0 1em 0 0}@media only screen and (max-width:500px){header h1{font-size:1.5em}}#settings-link:before{content:" ";font-size:.85em;color:#000;opacity:.6;padding-left:1.67em;background-image:linear-gradient(transparent,transparent),url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='20' height='20'%3E%3Cg transform='translate(10 10)'%3E%3Cpath id='a' d='M1.5-10h-3l-1 6.5h5m0 7h-5l1 6.5h3'/%3E%3Cuse transform='rotate(45)' xlink:href='%23a'/%3E%3Cuse transform='rotate(90)' xlink:href='%23a'/%3E%3Cuse transform='rotate(135)' xlink:href='%23a'/%3E%3C/g%3E%3Cpath d='M10 2.5a7.5 7.5 0 000 15 7.5 7.5 0 000-15v4a3.5 3.5 0 010 7 3.5 3.5 0 010-7'/%3E%3C/svg%3E");background-repeat:no-repeat;background-size:contain}footer{padding:1em;font-size:.9em;text-align:center;line-height:1.5;border-top:1px solid #c8ccd1;background:#fff}footer ul{margin:0}footer li{display:inline}footer li:not(:last-child):after{content:" \00b7"}footer a{white-space:nowrap}ol,ul{line-height:1.5}h2{margin-bottom:.2em}#info-box,#notice-box{padding:0 1em;margin:1em 0}#notice-box ul{padding-left:1.5em;margin:0}#cv-result{padding:.5em;margin:1em 0}#attribution-warning{padding:1em;margin:1em 0}#sources-container,#turnitin-container{padding:.5em 1em 1em;margin:1em 0}#sources-container{background-color:#eee;border:1px solid #bbb}#sources-title,#turnitin-title{margin-bottom:-.5em;text-align:center;font-weight:700}#cv-additional{display:none}#generation-time{text-align:right;font-style:italic}@media only screen and (min-width:600px){#generation-time{margin-top:-1em}}#heading{width:100%}#cv-result-sources{width:100%;border-spacing:0 .4em;table-layout:fixed}#cv-result-sources col:first-child{width:80%}#cv-result-sources col:nth-child(2),#cv-result-sources col:nth-child(3){width:10%}#cv-result-sources th{text-align:left}#cv-result-sources tr:nth-child(2n){background-color:#e0e0e0}#cv-result-sources td:first-child{overflow:hidden;word-wrap:break-word}#cv-result-head-table{width:100%;text-align:center;table-layout:fixed;border-spacing:0}#cv-result-head-table col:nth-child(odd){width:42.5%}#cv-result-head-table col:nth-child(2){width:15%}#cv-result-head-table td:nth-child(odd){font-size:1.25em;font-weight:700;overflow:hidden;word-wrap:break-word}#cv-result-head-table td:nth-child(2) div:first-child{font-weight:700;white-space:nowrap}#cv-result-head-table td:nth-child(2) div:nth-child(2){font-size:2.5em;font-weight:700;line-height:1}#cv-result-head-table td:nth-child(2) div:nth-child(3){font-size:.8em}#cv-chain-table,#turnitin-table{width:100%;border-spacing:0;table-layout:fixed}#turnitin-table{word-wrap:break-word}#source-row-selected{background-color:#cfcfcf!important}#head-settings{text-align:right}#cv-result-header{margin:0}#redirected-from{font-size:.75em;font-weight:400}#redirected-from,#result-head-no-sources{font-style:italic}#source-selected{font-weight:700}#cv-cached{position:relative}#cv-cached span{display:none;position:absolute;top:1.5em;left:-5em;width:30em;padding:1em;z-index:1;background:#f3f3f3;border:1px solid #aaa;color:#000;font-style:normal;text-align:left}.green-box{background-color:#efe;border:1px solid #7f7}.yellow-box{background-color:#ffd;border:1px solid #ee5}.red-box{background-color:#fee;border:1px solid #f77}.gray-box{background-color:#eee;border:1px solid #aaa}.indentable{white-space:pre-wrap}.cv-source-footer{padding-bottom:.5em;font-style:italic}.cv-source-footer a{font-style:normal}.cv-chain-detail{padding:0 1em;background-color:#fff;border:1px solid #bbb}.cv-chain-cell{vertical-align:top;word-wrap:break-word}.cv-chain-cell:first-child{padding-right:.5em}.cv-chain-cell:last-child{padding-left:.5em}.turnitin-table-cell{padding:.5em 0 .3em}.turnitin-table-cell ul{margin:.2em 0 0;line-height:1.4}.cv-hl{background:#faa}.cv-hl-in{background:#fcc;background:linear-gradient(270deg,#faa,#fff)}.cv-hl-out{background:#fcc;background:linear-gradient(90deg,#faa,#fff)}.mono{font-family:monospace}.light{color:#ccc}.medium{color:#aaa}.source-similarity{font-weight:700}.source-suspect{color:#900}.source-possible{color:#990}.source-novio{color:#090}.source-excluded,.source-skipped{font-style:italic}a:link,a:visited{color:#002bb8;text-decoration:none}a:hover{color:#002bb8}a:active,a:hover{text-decoration:underline}a:active{color:#404}header a:hover,header a:link,header a:visited{color:#54595d}header a:active{color:#333}#cv-cached:active{color:#040}#cv-cached:active,#cv-cached:hover{text-decoration:none}#cv-cached:hover span{display:block}.source-url:link,.source-url:visited{color:#357}.source-url:hover{color:#035}.source-url:active{color:#404}.oo-ui-horizontalLayout>.oo-ui-dropdownInputWidget,.oo-ui-horizontalLayout>.oo-ui-textInputWidget{width:auto}.oo-ui-fieldLayout.oo-ui-fieldLayout-align-inline{hyphens:manual}.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-left>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-header,.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-right>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-header{width:10%}.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-left>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-field,.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-right>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-field{width:90%}.compare-url.oo-ui-textInputWidget,.page-title.oo-ui-textInputWidget{width:60%}.page-oldid.oo-ui-textInputWidget{width:10em}label.page,label.site{min-width:4em}label.action{min-width:10em}@media only screen and (max-width:720px){.oo-ui-horizontalLayout>.oo-ui-widget{width:100%}} \ No newline at end of file +body,html{height:100%;margin:0}body{background-attachment:fixed;background-color:#eaecf0;background-position:50%;background-size:cover;color:#000;display:flex;flex-direction:column;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;font-size:14px}#container{flex:auto;line-height:1.25;margin:0 auto}#container.splash{display:flex;flex-direction:column;justify-content:center}@media only screen and (min-width:1200px){#container.splash{max-width:1600px;min-width:1200px}}@media only screen and (max-width:1200px){#container.splash{width:100%}}#container.splash>.padding{height:25%}#content{background-color:#fff;border:1px solid #c8ccd1;filter:drop-shadow(0 0 10px rgba(0,0,0,.25));margin:1.5em 3em;padding:1em}@media only screen and (max-width:1000px){#content{margin:1em}}@media only screen and (max-width:400px){#content{margin:0}}header{background-color:#eaecf0;padding:.2em 1em}header>*{display:inline-block;vertical-align:middle}header h1{font-size:2.25em;font-weight:400;margin:0 1em 0 0}@media only screen and (max-width:500px){header h1{font-size:1.5em}}#settings-link:before{background-image:linear-gradient(transparent,transparent),url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='20' height='20' viewBox='0 0 20 20'%3E%3Ctitle%3Esettings%3C/title%3E%3Cg transform='translate(10 10)'%3E%3Cpath id='a' d='M1.5-10h-3l-1 6.5h5m0 7h-5l1 6.5h3'/%3E%3Cuse xlink:href='%23a' transform='rotate(45)'/%3E%3Cuse xlink:href='%23a' transform='rotate(90)'/%3E%3Cuse xlink:href='%23a' transform='rotate(135)'/%3E%3C/g%3E%3Cpath d='M10 2.5a7.5 7.5 0 0 0 0 15 7.5 7.5 0 0 0 0-15v4a3.5 3.5 0 0 1 0 7 3.5 3.5 0 0 1 0-7'/%3E%3C/svg%3E");background-repeat:no-repeat;background-size:contain;color:#000;content:" ";font-size:.85em;opacity:.6;padding-left:1.67em}footer{background:#fff;border-top:1px solid #c8ccd1;font-size:.9em;line-height:1.5;padding:1em;text-align:center}footer ul{margin:0}footer li{display:inline}footer li:not(:last-child):after{content:" \00b7"}footer a{white-space:nowrap}ol,ul{line-height:1.5}h2{margin-bottom:.2em}#info-box,#notice-box{margin:1em 0;padding:0 1em}#notice-box ul{margin:0;padding-left:1.5em}#cv-result{margin:1em 0;padding:.5em}#attribution-warning{margin:1em 0;padding:1em}#sources-container,#turnitin-container{margin:1em 0;padding:.5em 1em 1em}#sources-container{background-color:#eee;border:1px solid #bbb}#sources-title,#turnitin-title{font-weight:700;margin-bottom:-.5em;text-align:center}#cv-additional{display:none}#generation-time{font-style:italic;text-align:right}@media only screen and (min-width:600px){#generation-time{margin-top:-1em}}#heading{width:100%}#cv-result-sources{border-spacing:0 .4em;table-layout:fixed;width:100%}#cv-result-sources col:first-child{width:80%}#cv-result-sources col:nth-child(2),#cv-result-sources col:nth-child(3){width:10%}#cv-result-sources th{text-align:left}#cv-result-sources tr:nth-child(2n){background-color:#e0e0e0}#cv-result-sources td:first-child{overflow:hidden;word-wrap:break-word}#cv-result-head-table{border-spacing:0;table-layout:fixed;text-align:center;width:100%}#cv-result-head-table col:nth-child(odd){width:42.5%}#cv-result-head-table col:nth-child(2){width:15%}#cv-result-head-table td:nth-child(odd){font-size:1.25em;font-weight:700;overflow:hidden;word-wrap:break-word}#cv-result-head-table td:nth-child(2) div:first-child{font-weight:700;white-space:nowrap}#cv-result-head-table td:nth-child(2) div:nth-child(2){font-size:2.5em;font-weight:700;line-height:1}#cv-result-head-table td:nth-child(2) div:nth-child(3){font-size:.8em}#cv-chain-table,#turnitin-table{border-spacing:0;table-layout:fixed;width:100%}#turnitin-table{word-wrap:break-word}#source-row-selected{background-color:#cfcfcf!important}#head-settings{text-align:right}#cv-result-header{margin:0}#redirected-from{font-size:.75em;font-weight:400}#redirected-from,#result-head-no-sources{font-style:italic}#source-selected{font-weight:700}#cv-cached{position:relative}#cv-cached span{background:#f3f3f3;border:1px solid #aaa;color:#000;display:none;font-style:normal;left:-5em;padding:1em;position:absolute;text-align:left;top:1.5em;width:30em;z-index:1}.green-box{background-color:#efe;border:1px solid #7f7}.yellow-box{background-color:#ffd;border:1px solid #ee5}.red-box{background-color:#fee;border:1px solid #f77}.gray-box{background-color:#eee;border:1px solid #aaa}.indentable{white-space:pre-wrap}.cv-source-footer{font-style:italic;padding-bottom:.5em}.cv-source-footer a{font-style:normal}.cv-chain-detail{background-color:#fff;border:1px solid #bbb;padding:0 1em}.cv-chain-cell{vertical-align:top;word-wrap:break-word}.cv-chain-cell:first-child{padding-right:.5em}.cv-chain-cell:last-child{padding-left:.5em}.turnitin-table-cell{padding:.5em 0 .3em}.turnitin-table-cell ul{line-height:1.4;margin:.2em 0 0}.cv-hl{background:#faa}.cv-hl-in{background:#fcc;background:linear-gradient(270deg,#faa,#fff)}.cv-hl-out{background:#fcc;background:linear-gradient(90deg,#faa,#fff)}.mono{font-family:monospace}.light{color:#ccc}.medium{color:#aaa}.source-similarity{font-weight:700}.source-suspect{color:#900}.source-possible{color:#990}.source-novio{color:#090}.source-excluded,.source-skipped{font-style:italic}a:link,a:visited{color:#002bb8;text-decoration:none}a:hover{color:#002bb8}a:active,a:hover{text-decoration:underline}a:active{color:#404}header a:hover,header a:link,header a:visited{color:#54595d}header a:active{color:#333}#cv-cached:active{color:#040}#cv-cached:active,#cv-cached:hover{text-decoration:none}#cv-cached:hover span{display:block}.source-url:link,.source-url:visited{color:#357}.source-url:hover{color:#035}.source-url:active{color:#404}.oo-ui-horizontalLayout>.oo-ui-dropdownInputWidget,.oo-ui-horizontalLayout>.oo-ui-textInputWidget{width:auto}.oo-ui-fieldLayout.oo-ui-fieldLayout-align-inline{hyphens:manual}.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-left>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-header,.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-right>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-header{width:10%}.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-left>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-field,.oo-ui-fieldLayout.oo-ui-labelElement.oo-ui-fieldLayout-align-right>.oo-ui-fieldLayout-body>.oo-ui-fieldLayout-field{width:90%}.compare-url.oo-ui-textInputWidget,.page-title.oo-ui-textInputWidget{width:60%}.page-oldid.oo-ui-textInputWidget{width:10em}label.page,label.site{min-width:4em}label.action{min-width:10em}@media only screen and (max-width:720px){.oo-ui-horizontalLayout>.oo-ui-widget{width:100%}} \ No newline at end of file