@@ -24,25 +24,19 @@ Running | |||
- Install all dependencies listed above. You might want to use a | |||
[virtualenv](http://virtualenv.readthedocs.org/). | |||
- Create the SQL database defined in `schema.sql`. Also create the `cache` and | |||
`cache_data` tables defined by | |||
[earwigbot-plugins](https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/schema/afc_copyvios.sql); | |||
this can be in the same or a different database. | |||
- Create an SQL database with the `cache` and `cache_data` tables defined by | |||
[earwigbot-plugins](https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/schema/afc_copyvios.sql). | |||
- Create an earwigbot instance in `.earwigbot` (run `earwigbot .earwigbot`). In | |||
`.earwigbot/config.yml`, fill out the connection info for the database(s) | |||
above by adding the following to the `wiki` section: | |||
`.earwigbot/config.yml`, fill out the connection info for the database by | |||
adding the following to the `wiki` section: | |||
_copyviosSQL: | |||
globals: | |||
host: <hostname of database defined in schema.sql> | |||
db: <name of database> | |||
cache: | |||
host: <hostname of database containing cache and cache_data tables> | |||
db: <name of database> | |||
host: <hostname of database server> | |||
db: <name of database> | |||
If additional arguments are needed by `oursql.connect()`, like usernames or | |||
passwords, they should be added to the `globals` and `cache` sections. | |||
passwords, they should be added to the `_copyviosSQL` section. | |||
- Copy `.lighttpd.conf` to the relevant location (on Tool Labs, this is in the | |||
root of the project's home directory) and adjust its contents as necessary. | |||
@@ -18,7 +18,7 @@ from copyvios.api import format_api_error, handle_api_request | |||
from copyvios.checker import do_check | |||
from copyvios.cookies import parse_cookies | |||
from copyvios.settings import process_settings | |||
from copyvios.sites import get_sites | |||
from copyvios.sites import update_sites | |||
app = Flask(__name__) | |||
MakoTemplates(app) | |||
@@ -28,9 +28,7 @@ app.logger.addHandler(TimedRotatingFileHandler( | |||
"logs/app.log", when="midnight", backupCount=7)) | |||
app.logger.info(u"Flask server started " + asctime()) | |||
bot = Bot(".earwigbot", 100) | |||
getLogger("earwigbot.wiki.cvworker").setLevel(INFO) | |||
globalize() | |||
def catch_errors(func): | |||
@wraps(func) | |||
@@ -43,12 +41,20 @@ def catch_errors(func): | |||
return render_template("error.mako", traceback=format_exc()) | |||
return inner | |||
@app.before_first_request | |||
def setup_app(): | |||
g.bot = Bot(".earwigbot", 100) | |||
g.langs, g.projects = set(), set() | |||
g.last_sites_update = 0 | |||
g.background_data = {} | |||
g.last_background_updates = {} | |||
globalize() | |||
@app.before_request | |||
def prepare_request(): | |||
g.bot = bot | |||
g.globals_db = g.cache_db = None | |||
g.cookies = parse_cookies(request.script_root, | |||
request.environ.get("HTTP_COOKIE")) | |||
g.db = None | |||
g.cookies = parse_cookies( | |||
request.script_root, request.environ.get("HTTP_COOKIE")) | |||
g.new_cookies = [] | |||
@app.after_request | |||
@@ -66,25 +72,24 @@ def write_access_log(response): | |||
@app.teardown_appcontext | |||
def close_databases(error): | |||
if g.globals_db: | |||
g.globals_db.close() | |||
if g.cache_db: | |||
g.cache_db.close() | |||
if g.db: | |||
g.db.close() | |||
@app.route("/") | |||
@catch_errors | |||
def index(): | |||
query = do_check() | |||
update_sites() | |||
return render_template("index.mako", query=query, result=query.result) | |||
@app.route("/settings", methods=["GET", "POST"]) | |||
@catch_errors | |||
def settings(): | |||
status = process_settings() if request.method == "POST" else None | |||
langs, projects = get_sites() | |||
default = bot.wiki.get_site() | |||
kwargs = {"status": status, "langs": langs, "projects": projects, | |||
"default_lang": default.lang, "default_project": default.project} | |||
update_sites() | |||
default = g.bot.wiki.get_site() | |||
kwargs = {"status": status, "default_lang": default.lang, | |||
"default_project": default.project} | |||
return render_template("settings.mako", **kwargs) | |||
@app.route("/api") | |||
@@ -2,9 +2,11 @@ | |||
from collections import OrderedDict | |||
from flask import g | |||
from .checker import do_check, T_POSSIBLE, T_SUSPECT | |||
from .misc import Query | |||
from .sites import get_sites | |||
from .sites import update_sites | |||
__all__ = ["format_api_error", "handle_api_request"] | |||
@@ -92,9 +94,9 @@ def _hook_check(query): | |||
return data | |||
def _hook_sites(query): | |||
langs, projects = get_sites() | |||
update_sites() | |||
return OrderedDict(( | |||
("status", "ok"), ("langs", langs), ("projects", projects))) | |||
("status", "ok"), ("langs", g.langs), ("projects", g.projects))) | |||
_HOOKS = { | |||
"compare": _hook_check, | |||
@@ -4,73 +4,30 @@ from datetime import datetime, timedelta | |||
from json import loads | |||
import random | |||
import re | |||
from time import time | |||
from earwigbot import exceptions | |||
from flask import g | |||
from .misc import get_globals_db | |||
__all__ = ["set_background"] | |||
def set_background(selected): | |||
conn = get_globals_db() | |||
if "CopyviosScreenCache" in g.cookies: | |||
cache = g.cookies["CopyviosScreenCache"].value | |||
try: | |||
screen = loads(cache) | |||
int(screen["width"]) | |||
int(screen["height"]) | |||
except (ValueError, KeyError): | |||
screen = {"width": 1024, "height": 768} | |||
else: | |||
screen = {"width": 1024, "height": 768} | |||
if selected == "potd": | |||
info = _update_url(conn, "background_potd", 1, _get_fresh_potd) | |||
else: | |||
info = _update_url(conn, "background_list", 2, _get_fresh_list) | |||
filename, url, descurl, width, height = info | |||
bg_url = _build_url(screen, filename, url, width, height) | |||
g.descurl = descurl | |||
return bg_url | |||
def _get_commons_site(): | |||
try: | |||
return g.bot.wiki.get_site("commonswiki") | |||
except exceptions.SiteNotFoundError: | |||
return g.bot.wiki.add_site(project="wikimedia", lang="commons") | |||
def _update_url(conn, service, bg_id, callback): | |||
query1 = "SELECT update_time FROM updates WHERE update_service = ?" | |||
query2 = "SELECT 1 FROM background WHERE background_id = ?" | |||
query3 = "DELETE FROM background WHERE background_id = ?" | |||
query4 = "INSERT INTO background VALUES (?, ?, ?, ?, ?, ?)" | |||
query5 = "SELECT 1 FROM updates WHERE update_service = ?" | |||
query6 = "UPDATE updates SET update_time = ? WHERE update_service = ?" | |||
query7 = "INSERT INTO updates VALUES (?, ?)" | |||
query8 = "SELECT * FROM background WHERE background_id = ?" | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1, (service,)) | |||
try: | |||
update_time = datetime.utcfromtimestamp(cursor.fetchall()[0][0]) | |||
except IndexError: | |||
update_time = datetime.min | |||
plus_one = update_time + timedelta(days=1) | |||
max_age = datetime(plus_one.year, plus_one.month, plus_one.day) | |||
if datetime.utcnow() > max_age: | |||
filename, url, descurl, width, height = callback() | |||
cursor.execute(query2, (bg_id,)) | |||
if cursor.fetchall(): | |||
cursor.execute(query3, (bg_id,)) | |||
cursor.execute(query4, (bg_id, filename, url, descurl, width, | |||
height)) | |||
cursor.execute(query5, (service,)) | |||
if cursor.fetchall(): | |||
cursor.execute(query6, (time(), service)) | |||
else: | |||
cursor.execute(query7, (service, time())) | |||
else: | |||
cursor.execute(query8, (bg_id,)) | |||
filename, url, descurl, width, height = cursor.fetchone()[1:] | |||
return filename, url, descurl, width, height | |||
def _load_file(site, filename): | |||
res = site.api_query(action="query", prop="imageinfo", iiprop="url|size", | |||
titles="File:" + filename) | |||
data = res["query"]["pages"].values()[0]["imageinfo"][0] | |||
url = data["url"] | |||
descurl = data["descriptionurl"] | |||
width = data["width"] | |||
height = data["height"] | |||
return filename.replace(" ", "_"), url, descurl, width, height | |||
def _get_fresh_potd(): | |||
site = _get_site() | |||
site = _get_commons_site() | |||
date = datetime.utcnow().strftime("%Y-%m-%d") | |||
page = site.get_page("Template:Potd/" + date) | |||
regex = ur"\{\{Potd filename\|(?:1=)?(.*?)\|.*?\}\}" | |||
@@ -78,29 +35,13 @@ def _get_fresh_potd(): | |||
return _load_file(site, filename) | |||
def _get_fresh_list(): | |||
site = _get_site() | |||
site = _get_commons_site() | |||
page = site.get_page("User:The Earwig/POTD") | |||
regex = ur"\*\*?\s*\[\[:File:(.*?)\]\]" | |||
filenames = re.findall(regex, page.get()) | |||
filename = random.choice(filenames) | |||
return _load_file(site, filename) | |||
def _load_file(site, filename): | |||
res = site.api_query(action="query", prop="imageinfo", iiprop="url|size", | |||
titles="File:" + filename) | |||
data = res["query"]["pages"].values()[0]["imageinfo"][0] | |||
url = data["url"] | |||
descurl = data["descriptionurl"] | |||
width = data["width"] | |||
height = data["height"] | |||
return filename.replace(" ", "_"), url, descurl, width, height | |||
def _get_site(): | |||
try: | |||
return g.bot.wiki.get_site("commonswiki") | |||
except exceptions.SiteNotFoundError: | |||
return g.bot.wiki.add_site(project="wikimedia", lang="commons") | |||
def _build_url(screen, filename, url, imgwidth, imgheight): | |||
width = screen["width"] | |||
if float(imgwidth) / imgheight > float(screen["width"]) / screen["height"]: | |||
@@ -109,3 +50,38 @@ def _build_url(screen, filename, url, imgwidth, imgheight): | |||
return url | |||
url = url.replace("/commons/", "/commons/thumb/") | |||
return url + "/" + str(width) + "px-" + filename | |||
_BACKGROUNDS = { | |||
"potd": _get_fresh_potd, | |||
"list": _get_fresh_list | |||
} | |||
def _get_background(selected): | |||
if not g.last_background_updates: | |||
for key in _BACKGROUNDS: | |||
g.last_background_updates[key] = datetime.min | |||
plus_one = g.last_background_updates[selected] + timedelta(days=1) | |||
max_age = datetime(plus_one.year, plus_one.month, plus_one.day) | |||
if datetime.utcnow() > max_age: | |||
update_func = _BACKGROUNDS.get(selected, _get_fresh_list) | |||
g.background_data[selected] = update_func() | |||
g.last_background_updates[selected] = datetime.utcnow() | |||
return g.background_data[selected] | |||
def set_background(selected): | |||
if "CopyviosScreenCache" in g.cookies: | |||
cache = g.cookies["CopyviosScreenCache"].value | |||
try: | |||
screen = loads(cache) | |||
int(screen["width"]) | |||
int(screen["height"]) | |||
except (ValueError, KeyError): | |||
screen = {"width": 1024, "height": 768} | |||
else: | |||
screen = {"width": 1024, "height": 768} | |||
filename, url, descurl, width, height = _get_background(selected) | |||
bg_url = _build_url(screen, filename, url, width, height) | |||
g.descurl = descurl | |||
return bg_url |
@@ -9,8 +9,8 @@ from earwigbot.wiki.copyvios.markov import EMPTY, MarkovChain | |||
from earwigbot.wiki.copyvios.parsers import ArticleTextParser | |||
from earwigbot.wiki.copyvios.result import CopyvioSource, CopyvioCheckResult | |||
from .misc import Query, get_cache_db | |||
from .sites import get_site, get_sites | |||
from .misc import Query, get_db | |||
from .sites import get_site | |||
__all__ = ["do_check", "T_POSSIBLE", "T_SUSPECT"] | |||
@@ -30,7 +30,6 @@ def do_check(query=None): | |||
if query.project: | |||
query.project = query.project.lower() | |||
query.all_langs, query.all_projects = get_sites() | |||
query.submitted = query.project and query.lang and (query.title or query.oldid) | |||
if query.submitted: | |||
query.site = get_site(query) | |||
@@ -61,7 +60,7 @@ def _get_results(query, follow=True): | |||
if not query.action: | |||
query.action = "compare" if query.url else "search" | |||
if query.action == "search": | |||
conn = get_cache_db() | |||
conn = get_db() | |||
use_engine = 0 if query.use_engine in ("0", "false") else 1 | |||
use_links = 0 if query.use_links in ("0", "false") else 1 | |||
if not use_engine and not use_links: | |||
@@ -8,7 +8,7 @@ from sqlalchemy.pool import manage | |||
oursql = manage(oursql) | |||
__all__ = ["Query", "get_globals_db", "get_cache_db", "httpsfix", "urlstrip"] | |||
__all__ = ["Query", "get_db", "httpsfix", "urlstrip"] | |||
class Query(object): | |||
def __init__(self, method="GET"): | |||
@@ -27,22 +27,14 @@ class Query(object): | |||
self.query[key] = value | |||
def _connect_db(name): | |||
args = g.bot.config.wiki["_copyviosSQL"][name] | |||
args["read_default_file"] = expanduser("~/.my.cnf") | |||
args["autoping"] = True | |||
args["autoreconnect"] = True | |||
return oursql.connect(**args) | |||
def get_globals_db(): | |||
if not g.globals_db: | |||
g.globals_db = _connect_db("globals") | |||
return g.globals_db | |||
def get_cache_db(): | |||
if not g.cache_db: | |||
g.cache_db = _connect_db("cache") | |||
return g.cache_db | |||
def get_db(): | |||
if not g.db: | |||
args = g.bot.config.wiki["_copyviosSQL"] | |||
args["read_default_file"] = expanduser("~/.my.cnf") | |||
args["autoping"] = True | |||
args["autoreconnect"] = True | |||
g.db = oursql.connect(**args) | |||
return g.db | |||
def httpsfix(context, url): | |||
if url.startswith("http://"): | |||
@@ -6,9 +6,7 @@ from urlparse import urlparse | |||
from earwigbot import exceptions | |||
from flask import g | |||
from .misc import get_globals_db | |||
__all__ = ["get_site", "get_sites"] | |||
__all__ = ["get_site", "update_sites"] | |||
def get_site(query): | |||
lang, project, name = query.lang, query.project, query.name | |||
@@ -31,35 +29,16 @@ def get_site(query): | |||
except (exceptions.APIError, exceptions.LoginError): | |||
return None | |||
def get_sites(): | |||
max_staleness = 60 * 60 * 24 * 7 | |||
conn = get_globals_db() | |||
query1 = "SELECT update_time FROM updates WHERE update_service = ?" | |||
query2 = "SELECT lang_code, lang_name FROM language" | |||
query3 = "SELECT project_code, project_name FROM project" | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1, ("sites",)) | |||
try: | |||
time_since_update = int(time() - cursor.fetchall()[0][0]) | |||
except IndexError: | |||
time_since_update = time() | |||
if time_since_update > max_staleness: | |||
_update_sites(cursor) | |||
cursor.execute(query2) | |||
langs = [] | |||
for code, name in cursor.fetchall(): | |||
if "\U" in name: | |||
name = name.decode("unicode_escape") | |||
langs.append((code, name)) | |||
cursor.execute(query3) | |||
projects = cursor.fetchall() | |||
return langs, projects | |||
def update_sites(): | |||
if time() - g.last_sites_update > 60 * 60 * 24 * 7: | |||
g.langs, g.projects = _load_sites() | |||
g.last_sites_update = time() | |||
def _update_sites(cursor): | |||
def _load_sites(): | |||
site = g.bot.wiki.get_site() | |||
matrix = site.api_query(action="sitematrix")["sitematrix"] | |||
del matrix["count"] | |||
languages, projects = set(), set() | |||
langs, projects = set(), set() | |||
for site in matrix.itervalues(): | |||
if isinstance(site, list): # Special sites | |||
bad_sites = ["closed", "private", "fishbowl"] | |||
@@ -72,50 +51,17 @@ def _update_sites(cursor): | |||
lang, project = full.rsplit(".", 2)[:2] | |||
code = u"{0}::{1}".format(lang, special["dbname"]) | |||
name = special["code"].capitalize() | |||
languages.add((code, u"{0} ({1})".format(lang, name))) | |||
langs.add((code, u"{0} ({1})".format(lang, name))) | |||
projects.add((project, project.capitalize())) | |||
continue | |||
this = set() | |||
for web in site["site"]: | |||
if "closed" in web: | |||
continue | |||
project = "wikipedia" if web["code"] == u"wiki" else web["code"] | |||
this.add((project, project.capitalize())) | |||
if this: | |||
code = site["code"] | |||
if "\U" in site["name"].encode("unicode_escape"): | |||
name = site["name"].encode("unicode_escape") | |||
else: | |||
name = site["name"] | |||
languages.add((code, u"{0} ({1})".format(code, name))) | |||
projects |= this | |||
_save_site_updates(cursor, languages, projects) | |||
def _save_site_updates(cursor, languages, projects): | |||
query1 = "SELECT lang_code, lang_name FROM language" | |||
query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" | |||
query3 = "INSERT INTO language VALUES (?, ?)" | |||
query4 = "SELECT project_code, project_name FROM project" | |||
query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?" | |||
query6 = "INSERT INTO project VALUES (?, ?)" | |||
query7 = "SELECT 1 FROM updates WHERE update_service = ?" | |||
query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" | |||
query9 = "INSERT INTO updates VALUES (?, ?)" | |||
_synchronize_sites_with_db(cursor, languages, query1, query2, query3) | |||
_synchronize_sites_with_db(cursor, projects, query4, query5, query6) | |||
cursor.execute(query7, ("sites",)) | |||
if cursor.fetchall(): | |||
cursor.execute(query8, (time(), "sites")) | |||
else: | |||
cursor.execute(query9, ("sites", time())) | |||
def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): | |||
removals = [] | |||
cursor.execute(q_list) | |||
for site in cursor: | |||
if site in updates: | |||
updates.remove(site) | |||
else: | |||
removals.append(site) | |||
cursor.executemany(q_rmv, removals) | |||
cursor.executemany(q_update, updates) | |||
this = set() | |||
for web in site["site"]: | |||
if "closed" in web: | |||
continue | |||
proj = "wikipedia" if web["code"] == u"wiki" else web["code"] | |||
this.add((proj, proj.capitalize())) | |||
if this: | |||
code = site["code"] | |||
langs.add((code, u"{0} ({1})".format(code, site["name"]))) | |||
projects |= this | |||
return langs, projects |
@@ -1,60 +0,0 @@ | |||
CREATE DATABASE `u_earwig_copyvios` | |||
DEFAULT CHARACTER SET utf8 | |||
DEFAULT COLLATE utf8_unicode_ci; | |||
DROP TABLE IF EXISTS `background`; | |||
CREATE TABLE `background` ( | |||
`background_id` int(9) unsigned NOT NULL, | |||
`background_filename` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, | |||
`background_url` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, | |||
`background_descurl` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, | |||
`background_width` int(9) unsigned DEFAULT NULL, | |||
`background_height` int(9) unsigned DEFAULT NULL, | |||
PRIMARY KEY (`background_id`) | |||
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; | |||
DROP TABLE IF EXISTS `language`; | |||
CREATE TABLE `language` ( | |||
`lang_code` varchar(64) COLLATE utf8_unicode_ci NOT NULL DEFAULT '', | |||
`lang_name` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, | |||
PRIMARY KEY (`lang_code`) | |||
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; | |||
DROP TABLE IF EXISTS `project`; | |||
CREATE TABLE `project` ( | |||
`project_code` varchar(64) COLLATE utf8_unicode_ci NOT NULL DEFAULT '', | |||
`project_name` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, | |||
PRIMARY KEY (`project_code`) | |||
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; | |||
DROP TABLE IF EXISTS `updates`; | |||
CREATE TABLE `updates` ( | |||
`update_service` varchar(128) COLLATE utf8_unicode_ci NOT NULL DEFAULT '', | |||
`update_time` int(10) unsigned DEFAULT NULL, | |||
PRIMARY KEY (`update_service`) | |||
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; | |||
@@ -1,6 +1,4 @@ | |||
<%! | |||
from json import dumps | |||
%>\ | |||
<%! from json import dumps %>\ | |||
<%def name="do_indent(size)"> | |||
<br /> | |||
% for i in xrange(size): | |||
@@ -18,9 +16,9 @@ | |||
} | |||
% elif isinstance(obj, (type([]), type(()))): | |||
[ | |||
% for member in obj: | |||
% for elem in obj: | |||
${do_indent(indent + 1)} | |||
${walk_json(member, indent + 1)}${"," if not loop.last else ""} | |||
${walk_json(elem, indent + 1)}${"," if not loop.last else ""} | |||
% endfor | |||
${do_indent(indent)} | |||
] | |||
@@ -51,7 +51,7 @@ | |||
<span class="mono">https://</span> | |||
<select name="lang"> | |||
<% selected_lang = query.orig_lang if query.orig_lang else g.cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in g.cookies else g.bot.wiki.get_site().lang %>\ | |||
% for code, name in query.all_langs: | |||
% for code, name in g.langs: | |||
% if code == selected_lang: | |||
<option value="${code | h}" selected="selected">${name}</option> | |||
% else: | |||
@@ -62,7 +62,7 @@ | |||
<span class="mono">.</span> | |||
<select name="project"> | |||
<% selected_project = query.project if query.project else g.cookies["CopyviosDefaultProject"].value if "CopyviosDefaultProject" in g.cookies else g.bot.wiki.get_site().project %>\ | |||
% for code, name in query.all_projects: | |||
% for code, name in g.projects: | |||
% if code == selected_project: | |||
<option value="${code | h}" selected="selected">${name}</option> | |||
% else: | |||
@@ -17,7 +17,7 @@ | |||
<span class="mono">https://</span> | |||
<select name="lang"> | |||
<% selected_lang = g.cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in g.cookies else default_lang %>\ | |||
% for code, name in langs: | |||
% for code, name in g.langs: | |||
% if code == selected_lang: | |||
<option value="${code | h}" selected="selected">${name}</option> | |||
% else: | |||
@@ -28,7 +28,7 @@ | |||
<span class="mono">.</span> | |||
<select name="project"> | |||
<% selected_project = g.cookies["CopyviosDefaultProject"].value if "CopyviosDefaultProject" in g.cookies else default_project %>\ | |||
% for code, name in projects: | |||
% for code, name in g.projects: | |||
% if code == selected_project: | |||
<option value="${code | h}" selected="selected">${name}</option> | |||
% else: | |||