From ddcf2a89637509b48f2cb53cfcf2dde9e11d60e4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 17 Sep 2014 20:32:18 -0500 Subject: [PATCH] Store what was previously in SQL tables in flask.g. --- README.md | 22 +++------ app.fcgi | 35 ++++++++------ copyvios/api.py | 8 +-- copyvios/background.py | 126 ++++++++++++++++++++---------------------------- copyvios/checker.py | 7 ++- copyvios/misc.py | 26 ++++------ copyvios/sites.py | 92 ++++++++--------------------------- schema.sql | 60 ----------------------- templates/api.mako | 8 ++- templates/index.mako | 4 +- templates/settings.mako | 4 +- 11 files changed, 122 insertions(+), 270 deletions(-) delete mode 100644 schema.sql diff --git a/README.md b/README.md index 7134e72..7317da4 100644 --- a/README.md +++ b/README.md @@ -24,25 +24,19 @@ Running - Install all dependencies listed above. You might want to use a [virtualenv](http://virtualenv.readthedocs.org/). -- Create the SQL database defined in `schema.sql`. Also create the `cache` and - `cache_data` tables defined by - [earwigbot-plugins](https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/schema/afc_copyvios.sql); - this can be in the same or a different database. +- Create an SQL database with the `cache` and `cache_data` tables defined by + [earwigbot-plugins](https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/schema/afc_copyvios.sql). - Create an earwigbot instance in `.earwigbot` (run `earwigbot .earwigbot`). In - `.earwigbot/config.yml`, fill out the connection info for the database(s) - above by adding the following to the `wiki` section: + `.earwigbot/config.yml`, fill out the connection info for the database by + adding the following to the `wiki` section: _copyviosSQL: - globals: - host: - db: - cache: - host: - db: - + host: + db: + If additional arguments are needed by `oursql.connect()`, like usernames or - passwords, they should be added to the `globals` and `cache` sections. + passwords, they should be added to the `_copyviosSQL` section. - Copy `.lighttpd.conf` to the relevant location (on Tool Labs, this is in the root of the project's home directory) and adjust its contents as necessary. diff --git a/app.fcgi b/app.fcgi index e1f0b53..eb9823c 100755 --- a/app.fcgi +++ b/app.fcgi @@ -18,7 +18,7 @@ from copyvios.api import format_api_error, handle_api_request from copyvios.checker import do_check from copyvios.cookies import parse_cookies from copyvios.settings import process_settings -from copyvios.sites import get_sites +from copyvios.sites import update_sites app = Flask(__name__) MakoTemplates(app) @@ -28,9 +28,7 @@ app.logger.addHandler(TimedRotatingFileHandler( "logs/app.log", when="midnight", backupCount=7)) app.logger.info(u"Flask server started " + asctime()) -bot = Bot(".earwigbot", 100) getLogger("earwigbot.wiki.cvworker").setLevel(INFO) -globalize() def catch_errors(func): @wraps(func) @@ -43,12 +41,20 @@ def catch_errors(func): return render_template("error.mako", traceback=format_exc()) return inner +@app.before_first_request +def setup_app(): + g.bot = Bot(".earwigbot", 100) + g.langs, g.projects = set(), set() + g.last_sites_update = 0 + g.background_data = {} + g.last_background_updates = {} + globalize() + @app.before_request def prepare_request(): - g.bot = bot - g.globals_db = g.cache_db = None - g.cookies = parse_cookies(request.script_root, - request.environ.get("HTTP_COOKIE")) + g.db = None + g.cookies = parse_cookies( + request.script_root, request.environ.get("HTTP_COOKIE")) g.new_cookies = [] @app.after_request @@ -66,25 +72,24 @@ def write_access_log(response): @app.teardown_appcontext def close_databases(error): - if g.globals_db: - g.globals_db.close() - if g.cache_db: - g.cache_db.close() + if g.db: + g.db.close() @app.route("/") @catch_errors def index(): query = do_check() + update_sites() return render_template("index.mako", query=query, result=query.result) @app.route("/settings", methods=["GET", "POST"]) @catch_errors def settings(): status = process_settings() if request.method == "POST" else None - langs, projects = get_sites() - default = bot.wiki.get_site() - kwargs = {"status": status, "langs": langs, "projects": projects, - "default_lang": default.lang, "default_project": default.project} + update_sites() + default = g.bot.wiki.get_site() + kwargs = {"status": status, "default_lang": default.lang, + "default_project": default.project} return render_template("settings.mako", **kwargs) @app.route("/api") diff --git a/copyvios/api.py b/copyvios/api.py index 3ccc76e..b69c49a 100644 --- a/copyvios/api.py +++ b/copyvios/api.py @@ -2,9 +2,11 @@ from collections import OrderedDict +from flask import g + from .checker import do_check, T_POSSIBLE, T_SUSPECT from .misc import Query -from .sites import get_sites +from .sites import update_sites __all__ = ["format_api_error", "handle_api_request"] @@ -92,9 +94,9 @@ def _hook_check(query): return data def _hook_sites(query): - langs, projects = get_sites() + update_sites() return OrderedDict(( - ("status", "ok"), ("langs", langs), ("projects", projects))) + ("status", "ok"), ("langs", g.langs), ("projects", g.projects))) _HOOKS = { "compare": _hook_check, diff --git a/copyvios/background.py b/copyvios/background.py index 73ae775..8c533d8 100644 --- a/copyvios/background.py +++ b/copyvios/background.py @@ -4,73 +4,30 @@ from datetime import datetime, timedelta from json import loads import random import re -from time import time from earwigbot import exceptions from flask import g -from .misc import get_globals_db - __all__ = ["set_background"] -def set_background(selected): - conn = get_globals_db() - if "CopyviosScreenCache" in g.cookies: - cache = g.cookies["CopyviosScreenCache"].value - try: - screen = loads(cache) - int(screen["width"]) - int(screen["height"]) - except (ValueError, KeyError): - screen = {"width": 1024, "height": 768} - else: - screen = {"width": 1024, "height": 768} - - if selected == "potd": - info = _update_url(conn, "background_potd", 1, _get_fresh_potd) - else: - info = _update_url(conn, "background_list", 2, _get_fresh_list) - filename, url, descurl, width, height = info - bg_url = _build_url(screen, filename, url, width, height) - g.descurl = descurl - return bg_url +def _get_commons_site(): + try: + return g.bot.wiki.get_site("commonswiki") + except exceptions.SiteNotFoundError: + return g.bot.wiki.add_site(project="wikimedia", lang="commons") -def _update_url(conn, service, bg_id, callback): - query1 = "SELECT update_time FROM updates WHERE update_service = ?" - query2 = "SELECT 1 FROM background WHERE background_id = ?" - query3 = "DELETE FROM background WHERE background_id = ?" - query4 = "INSERT INTO background VALUES (?, ?, ?, ?, ?, ?)" - query5 = "SELECT 1 FROM updates WHERE update_service = ?" - query6 = "UPDATE updates SET update_time = ? WHERE update_service = ?" - query7 = "INSERT INTO updates VALUES (?, ?)" - query8 = "SELECT * FROM background WHERE background_id = ?" - with conn.cursor() as cursor: - cursor.execute(query1, (service,)) - try: - update_time = datetime.utcfromtimestamp(cursor.fetchall()[0][0]) - except IndexError: - update_time = datetime.min - plus_one = update_time + timedelta(days=1) - max_age = datetime(plus_one.year, plus_one.month, plus_one.day) - if datetime.utcnow() > max_age: - filename, url, descurl, width, height = callback() - cursor.execute(query2, (bg_id,)) - if cursor.fetchall(): - cursor.execute(query3, (bg_id,)) - cursor.execute(query4, (bg_id, filename, url, descurl, width, - height)) - cursor.execute(query5, (service,)) - if cursor.fetchall(): - cursor.execute(query6, (time(), service)) - else: - cursor.execute(query7, (service, time())) - else: - cursor.execute(query8, (bg_id,)) - filename, url, descurl, width, height = cursor.fetchone()[1:] - return filename, url, descurl, width, height +def _load_file(site, filename): + res = site.api_query(action="query", prop="imageinfo", iiprop="url|size", + titles="File:" + filename) + data = res["query"]["pages"].values()[0]["imageinfo"][0] + url = data["url"] + descurl = data["descriptionurl"] + width = data["width"] + height = data["height"] + return filename.replace(" ", "_"), url, descurl, width, height def _get_fresh_potd(): - site = _get_site() + site = _get_commons_site() date = datetime.utcnow().strftime("%Y-%m-%d") page = site.get_page("Template:Potd/" + date) regex = ur"\{\{Potd filename\|(?:1=)?(.*?)\|.*?\}\}" @@ -78,29 +35,13 @@ def _get_fresh_potd(): return _load_file(site, filename) def _get_fresh_list(): - site = _get_site() + site = _get_commons_site() page = site.get_page("User:The Earwig/POTD") regex = ur"\*\*?\s*\[\[:File:(.*?)\]\]" filenames = re.findall(regex, page.get()) filename = random.choice(filenames) return _load_file(site, filename) -def _load_file(site, filename): - res = site.api_query(action="query", prop="imageinfo", iiprop="url|size", - titles="File:" + filename) - data = res["query"]["pages"].values()[0]["imageinfo"][0] - url = data["url"] - descurl = data["descriptionurl"] - width = data["width"] - height = data["height"] - return filename.replace(" ", "_"), url, descurl, width, height - -def _get_site(): - try: - return g.bot.wiki.get_site("commonswiki") - except exceptions.SiteNotFoundError: - return g.bot.wiki.add_site(project="wikimedia", lang="commons") - def _build_url(screen, filename, url, imgwidth, imgheight): width = screen["width"] if float(imgwidth) / imgheight > float(screen["width"]) / screen["height"]: @@ -109,3 +50,38 @@ def _build_url(screen, filename, url, imgwidth, imgheight): return url url = url.replace("/commons/", "/commons/thumb/") return url + "/" + str(width) + "px-" + filename + +_BACKGROUNDS = { + "potd": _get_fresh_potd, + "list": _get_fresh_list +} + +def _get_background(selected): + if not g.last_background_updates: + for key in _BACKGROUNDS: + g.last_background_updates[key] = datetime.min + + plus_one = g.last_background_updates[selected] + timedelta(days=1) + max_age = datetime(plus_one.year, plus_one.month, plus_one.day) + if datetime.utcnow() > max_age: + update_func = _BACKGROUNDS.get(selected, _get_fresh_list) + g.background_data[selected] = update_func() + g.last_background_updates[selected] = datetime.utcnow() + return g.background_data[selected] + +def set_background(selected): + if "CopyviosScreenCache" in g.cookies: + cache = g.cookies["CopyviosScreenCache"].value + try: + screen = loads(cache) + int(screen["width"]) + int(screen["height"]) + except (ValueError, KeyError): + screen = {"width": 1024, "height": 768} + else: + screen = {"width": 1024, "height": 768} + + filename, url, descurl, width, height = _get_background(selected) + bg_url = _build_url(screen, filename, url, width, height) + g.descurl = descurl + return bg_url diff --git a/copyvios/checker.py b/copyvios/checker.py index 4869733..6b88992 100644 --- a/copyvios/checker.py +++ b/copyvios/checker.py @@ -9,8 +9,8 @@ from earwigbot.wiki.copyvios.markov import EMPTY, MarkovChain from earwigbot.wiki.copyvios.parsers import ArticleTextParser from earwigbot.wiki.copyvios.result import CopyvioSource, CopyvioCheckResult -from .misc import Query, get_cache_db -from .sites import get_site, get_sites +from .misc import Query, get_db +from .sites import get_site __all__ = ["do_check", "T_POSSIBLE", "T_SUSPECT"] @@ -30,7 +30,6 @@ def do_check(query=None): if query.project: query.project = query.project.lower() - query.all_langs, query.all_projects = get_sites() query.submitted = query.project and query.lang and (query.title or query.oldid) if query.submitted: query.site = get_site(query) @@ -61,7 +60,7 @@ def _get_results(query, follow=True): if not query.action: query.action = "compare" if query.url else "search" if query.action == "search": - conn = get_cache_db() + conn = get_db() use_engine = 0 if query.use_engine in ("0", "false") else 1 use_links = 0 if query.use_links in ("0", "false") else 1 if not use_engine and not use_links: diff --git a/copyvios/misc.py b/copyvios/misc.py index d48efb2..45ddfaf 100644 --- a/copyvios/misc.py +++ b/copyvios/misc.py @@ -8,7 +8,7 @@ from sqlalchemy.pool import manage oursql = manage(oursql) -__all__ = ["Query", "get_globals_db", "get_cache_db", "httpsfix", "urlstrip"] +__all__ = ["Query", "get_db", "httpsfix", "urlstrip"] class Query(object): def __init__(self, method="GET"): @@ -27,22 +27,14 @@ class Query(object): self.query[key] = value -def _connect_db(name): - args = g.bot.config.wiki["_copyviosSQL"][name] - args["read_default_file"] = expanduser("~/.my.cnf") - args["autoping"] = True - args["autoreconnect"] = True - return oursql.connect(**args) - -def get_globals_db(): - if not g.globals_db: - g.globals_db = _connect_db("globals") - return g.globals_db - -def get_cache_db(): - if not g.cache_db: - g.cache_db = _connect_db("cache") - return g.cache_db +def get_db(): + if not g.db: + args = g.bot.config.wiki["_copyviosSQL"] + args["read_default_file"] = expanduser("~/.my.cnf") + args["autoping"] = True + args["autoreconnect"] = True + g.db = oursql.connect(**args) + return g.db def httpsfix(context, url): if url.startswith("http://"): diff --git a/copyvios/sites.py b/copyvios/sites.py index 8c186c4..85609b1 100644 --- a/copyvios/sites.py +++ b/copyvios/sites.py @@ -6,9 +6,7 @@ from urlparse import urlparse from earwigbot import exceptions from flask import g -from .misc import get_globals_db - -__all__ = ["get_site", "get_sites"] +__all__ = ["get_site", "update_sites"] def get_site(query): lang, project, name = query.lang, query.project, query.name @@ -31,35 +29,16 @@ def get_site(query): except (exceptions.APIError, exceptions.LoginError): return None -def get_sites(): - max_staleness = 60 * 60 * 24 * 7 - conn = get_globals_db() - query1 = "SELECT update_time FROM updates WHERE update_service = ?" - query2 = "SELECT lang_code, lang_name FROM language" - query3 = "SELECT project_code, project_name FROM project" - with conn.cursor() as cursor: - cursor.execute(query1, ("sites",)) - try: - time_since_update = int(time() - cursor.fetchall()[0][0]) - except IndexError: - time_since_update = time() - if time_since_update > max_staleness: - _update_sites(cursor) - cursor.execute(query2) - langs = [] - for code, name in cursor.fetchall(): - if "\U" in name: - name = name.decode("unicode_escape") - langs.append((code, name)) - cursor.execute(query3) - projects = cursor.fetchall() - return langs, projects +def update_sites(): + if time() - g.last_sites_update > 60 * 60 * 24 * 7: + g.langs, g.projects = _load_sites() + g.last_sites_update = time() -def _update_sites(cursor): +def _load_sites(): site = g.bot.wiki.get_site() matrix = site.api_query(action="sitematrix")["sitematrix"] del matrix["count"] - languages, projects = set(), set() + langs, projects = set(), set() for site in matrix.itervalues(): if isinstance(site, list): # Special sites bad_sites = ["closed", "private", "fishbowl"] @@ -72,50 +51,17 @@ def _update_sites(cursor): lang, project = full.rsplit(".", 2)[:2] code = u"{0}::{1}".format(lang, special["dbname"]) name = special["code"].capitalize() - languages.add((code, u"{0} ({1})".format(lang, name))) + langs.add((code, u"{0} ({1})".format(lang, name))) projects.add((project, project.capitalize())) - continue - this = set() - for web in site["site"]: - if "closed" in web: - continue - project = "wikipedia" if web["code"] == u"wiki" else web["code"] - this.add((project, project.capitalize())) - if this: - code = site["code"] - if "\U" in site["name"].encode("unicode_escape"): - name = site["name"].encode("unicode_escape") - else: - name = site["name"] - languages.add((code, u"{0} ({1})".format(code, name))) - projects |= this - _save_site_updates(cursor, languages, projects) - -def _save_site_updates(cursor, languages, projects): - query1 = "SELECT lang_code, lang_name FROM language" - query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" - query3 = "INSERT INTO language VALUES (?, ?)" - query4 = "SELECT project_code, project_name FROM project" - query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?" - query6 = "INSERT INTO project VALUES (?, ?)" - query7 = "SELECT 1 FROM updates WHERE update_service = ?" - query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" - query9 = "INSERT INTO updates VALUES (?, ?)" - _synchronize_sites_with_db(cursor, languages, query1, query2, query3) - _synchronize_sites_with_db(cursor, projects, query4, query5, query6) - cursor.execute(query7, ("sites",)) - if cursor.fetchall(): - cursor.execute(query8, (time(), "sites")) - else: - cursor.execute(query9, ("sites", time())) - -def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): - removals = [] - cursor.execute(q_list) - for site in cursor: - if site in updates: - updates.remove(site) else: - removals.append(site) - cursor.executemany(q_rmv, removals) - cursor.executemany(q_update, updates) + this = set() + for web in site["site"]: + if "closed" in web: + continue + proj = "wikipedia" if web["code"] == u"wiki" else web["code"] + this.add((proj, proj.capitalize())) + if this: + code = site["code"] + langs.add((code, u"{0} ({1})".format(code, site["name"]))) + projects |= this + return langs, projects diff --git a/schema.sql b/schema.sql deleted file mode 100644 index 2009ece..0000000 --- a/schema.sql +++ /dev/null @@ -1,60 +0,0 @@ --- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) --- --- Host: sql Database: u_earwig_copyvios --- ------------------------------------------------------ --- Server version 5.1.59 - - -CREATE DATABASE `u_earwig_copyvios` - DEFAULT CHARACTER SET utf8 - DEFAULT COLLATE utf8_unicode_ci; - --- --- Table structure for table `background` --- - -DROP TABLE IF EXISTS `background`; -CREATE TABLE `background` ( - `background_id` int(9) unsigned NOT NULL, - `background_filename` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `background_url` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `background_descurl` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `background_width` int(9) unsigned DEFAULT NULL, - `background_height` int(9) unsigned DEFAULT NULL, - PRIMARY KEY (`background_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Table structure for table `language` --- - -DROP TABLE IF EXISTS `language`; -CREATE TABLE `language` ( - `lang_code` varchar(64) COLLATE utf8_unicode_ci NOT NULL DEFAULT '', - `lang_name` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - PRIMARY KEY (`lang_code`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Table structure for table `project` --- - -DROP TABLE IF EXISTS `project`; -CREATE TABLE `project` ( - `project_code` varchar(64) COLLATE utf8_unicode_ci NOT NULL DEFAULT '', - `project_name` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - PRIMARY KEY (`project_code`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Table structure for table `updates` --- - -DROP TABLE IF EXISTS `updates`; -CREATE TABLE `updates` ( - `update_service` varchar(128) COLLATE utf8_unicode_ci NOT NULL DEFAULT '', - `update_time` int(10) unsigned DEFAULT NULL, - PRIMARY KEY (`update_service`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- Dump completed on 2012-07-20 20:16:08 diff --git a/templates/api.mako b/templates/api.mako index 6866535..976ff36 100644 --- a/templates/api.mako +++ b/templates/api.mako @@ -1,6 +1,4 @@ -<%! - from json import dumps -%>\ +<%! from json import dumps %>\ <%def name="do_indent(size)">
% for i in xrange(size): @@ -18,9 +16,9 @@ } % elif isinstance(obj, (type([]), type(()))): [ - % for member in obj: + % for elem in obj: ${do_indent(indent + 1)} - ${walk_json(member, indent + 1)}${"," if not loop.last else ""} + ${walk_json(elem, indent + 1)}${"," if not loop.last else ""} % endfor ${do_indent(indent)} ] diff --git a/templates/index.mako b/templates/index.mako index 7536677..b6b45ff 100644 --- a/templates/index.mako +++ b/templates/index.mako @@ -51,7 +51,7 @@ https:// <% selected_project = query.project if query.project else g.cookies["CopyviosDefaultProject"].value if "CopyviosDefaultProject" in g.cookies else g.bot.wiki.get_site().project %>\ - % for code, name in query.all_projects: + % for code, name in g.projects: % if code == selected_project: % else: diff --git a/templates/settings.mako b/templates/settings.mako index 403bf9d..dfd0642 100644 --- a/templates/settings.mako +++ b/templates/settings.mako @@ -17,7 +17,7 @@ https:// <% selected_project = g.cookies["CopyviosDefaultProject"].value if "CopyviosDefaultProject" in g.cookies else default_project %>\ - % for code, name in projects: + % for code, name in g.projects: % if code == selected_project: % else: