Sfoglia il codice sorgente

Store what was previously in SQL tables in flask.g.

Ben Kurtovic 9 anni fa
11 ha cambiato i file con 122 aggiunte e 252 eliminazioni
  1. +8
  2. +20
  3. +5
  4. +51
  5. +3
  6. +9
  7. +19
  8. +0
  9. +3
  10. +2
  11. +2

+ 8
- 14
README.md Vedi File

@@ -24,25 +24,19 @@ Running
- Install all dependencies listed above. You might want to use a

- Create the SQL database defined in `schema.sql`. Also create the `cache` and
`cache_data` tables defined by
this can be in the same or a different database.
- Create an SQL database with the `cache` and `cache_data` tables defined by

- Create an earwigbot instance in `.earwigbot` (run `earwigbot .earwigbot`). In
`.earwigbot/config.yml`, fill out the connection info for the database(s)
above by adding the following to the `wiki` section:
`.earwigbot/config.yml`, fill out the connection info for the database by
adding the following to the `wiki` section:

host: <hostname of database defined in schema.sql>
db: <name of database>
host: <hostname of database containing cache and cache_data tables>
db: <name of database>
host: <hostname of database server>
db: <name of database>

If additional arguments are needed by `oursql.connect()`, like usernames or
passwords, they should be added to the `globals` and `cache` sections.
passwords, they should be added to the `_copyviosSQL` section.

- Copy `.lighttpd.conf` to the relevant location (on Tool Labs, this is in the
root of the project's home directory) and adjust its contents as necessary.

+ 20
- 15
app.fcgi Vedi File

@@ -18,7 +18,7 @@ from copyvios.api import format_api_error, handle_api_request
from copyvios.checker import do_check
from copyvios.cookies import parse_cookies
from copyvios.settings import process_settings
from copyvios.sites import get_sites
from copyvios.sites import update_sites

app = Flask(__name__)
@@ -28,9 +28,7 @@ app.logger.addHandler(TimedRotatingFileHandler(
"logs/app.log", when="midnight", backupCount=7))
app.logger.info(u"Flask server started " + asctime())

bot = Bot(".earwigbot", 100)

def catch_errors(func):
@@ -43,12 +41,20 @@ def catch_errors(func):
return render_template("error.mako", traceback=format_exc())
return inner

def setup_app():
g.bot = Bot(".earwigbot", 100)
g.langs, g.projects = set(), set()
g.last_sites_update = 0
g.background_data = {}
g.last_background_updates = {}

def prepare_request():
g.bot = bot
g.globals_db = g.cache_db = None
g.cookies = parse_cookies(request.script_root,
g.db = None
g.cookies = parse_cookies(
request.script_root, request.environ.get("HTTP_COOKIE"))
g.new_cookies = []

@@ -66,25 +72,24 @@ def write_access_log(response):

def close_databases(error):
if g.globals_db:
if g.cache_db:
if g.db:

def index():
query = do_check()
return render_template("index.mako", query=query, result=query.result)

@app.route("/settings", methods=["GET", "POST"])
def settings():
status = process_settings() if request.method == "POST" else None
langs, projects = get_sites()
default = bot.wiki.get_site()
kwargs = {"status": status, "langs": langs, "projects": projects,
"default_lang": default.lang, "default_project": default.project}
default = g.bot.wiki.get_site()
kwargs = {"status": status, "default_lang": default.lang,
"default_project": default.project}
return render_template("settings.mako", **kwargs)


+ 5
- 3
copyvios/api.py Vedi File

@@ -2,9 +2,11 @@

from collections import OrderedDict

from flask import g

from .checker import do_check, T_POSSIBLE, T_SUSPECT
from .misc import Query
from .sites import get_sites
from .sites import update_sites

__all__ = ["format_api_error", "handle_api_request"]

@@ -92,9 +94,9 @@ def _hook_check(query):
return data

def _hook_sites(query):
langs, projects = get_sites()
return OrderedDict((
("status", "ok"), ("langs", langs), ("projects", projects)))
("status", "ok"), ("langs", g.langs), ("projects", g.projects)))

_HOOKS = {
"compare": _hook_check,

+ 51
- 75
copyvios/background.py Vedi File

@@ -4,73 +4,30 @@ from datetime import datetime, timedelta
from json import loads
import random
import re
from time import time

from earwigbot import exceptions
from flask import g

from .misc import get_globals_db

__all__ = ["set_background"]

def set_background(selected):
conn = get_globals_db()
if "CopyviosScreenCache" in g.cookies:
cache = g.cookies["CopyviosScreenCache"].value
screen = loads(cache)
except (ValueError, KeyError):
screen = {"width": 1024, "height": 768}
screen = {"width": 1024, "height": 768}

if selected == "potd":
info = _update_url(conn, "background_potd", 1, _get_fresh_potd)
info = _update_url(conn, "background_list", 2, _get_fresh_list)
filename, url, descurl, width, height = info
bg_url = _build_url(screen, filename, url, width, height)
g.descurl = descurl
return bg_url
def _get_commons_site():
return g.bot.wiki.get_site("commonswiki")
except exceptions.SiteNotFoundError:
return g.bot.wiki.add_site(project="wikimedia", lang="commons")

def _update_url(conn, service, bg_id, callback):
query1 = "SELECT update_time FROM updates WHERE update_service = ?"
query2 = "SELECT 1 FROM background WHERE background_id = ?"
query3 = "DELETE FROM background WHERE background_id = ?"
query4 = "INSERT INTO background VALUES (?, ?, ?, ?, ?, ?)"
query5 = "SELECT 1 FROM updates WHERE update_service = ?"
query6 = "UPDATE updates SET update_time = ? WHERE update_service = ?"
query7 = "INSERT INTO updates VALUES (?, ?)"
query8 = "SELECT * FROM background WHERE background_id = ?"
with conn.cursor() as cursor:
cursor.execute(query1, (service,))
update_time = datetime.utcfromtimestamp(cursor.fetchall()[0][0])
except IndexError:
update_time = datetime.min
plus_one = update_time + timedelta(days=1)
max_age = datetime(plus_one.year, plus_one.month, plus_one.day)
if datetime.utcnow() > max_age:
filename, url, descurl, width, height = callback()
cursor.execute(query2, (bg_id,))
if cursor.fetchall():
cursor.execute(query3, (bg_id,))
cursor.execute(query4, (bg_id, filename, url, descurl, width,
cursor.execute(query5, (service,))
if cursor.fetchall():
cursor.execute(query6, (time(), service))
cursor.execute(query7, (service, time()))
cursor.execute(query8, (bg_id,))
filename, url, descurl, width, height = cursor.fetchone()[1:]
return filename, url, descurl, width, height
def _load_file(site, filename):
res = site.api_query(action="query", prop="imageinfo", iiprop="url|size",
titles="File:" + filename)
data = res["query"]["pages"].values()[0]["imageinfo"][0]
url = data["url"]
descurl = data["descriptionurl"]
width = data["width"]
height = data["height"]
return filename.replace(" ", "_"), url, descurl, width, height

def _get_fresh_potd():
site = _get_site()
site = _get_commons_site()
date = datetime.utcnow().strftime("%Y-%m-%d")
page = site.get_page("Template:Potd/" + date)
regex = ur"\{\{Potd filename\|(?:1=)?(.*?)\|.*?\}\}"
@@ -78,29 +35,13 @@ def _get_fresh_potd():
return _load_file(site, filename)

def _get_fresh_list():
site = _get_site()
site = _get_commons_site()
page = site.get_page("User:The Earwig/POTD")
regex = ur"\*\*?\s*\[\[:File:(.*?)\]\]"
filenames = re.findall(regex, page.get())
filename = random.choice(filenames)
return _load_file(site, filename)

def _load_file(site, filename):
res = site.api_query(action="query", prop="imageinfo", iiprop="url|size",
titles="File:" + filename)
data = res["query"]["pages"].values()[0]["imageinfo"][0]
url = data["url"]
descurl = data["descriptionurl"]
width = data["width"]
height = data["height"]
return filename.replace(" ", "_"), url, descurl, width, height

def _get_site():
return g.bot.wiki.get_site("commonswiki")
except exceptions.SiteNotFoundError:
return g.bot.wiki.add_site(project="wikimedia", lang="commons")

def _build_url(screen, filename, url, imgwidth, imgheight):
width = screen["width"]
if float(imgwidth) / imgheight > float(screen["width"]) / screen["height"]:
@@ -109,3 +50,38 @@ def _build_url(screen, filename, url, imgwidth, imgheight):
return url
url = url.replace("/commons/", "/commons/thumb/")
return url + "/" + str(width) + "px-" + filename

"potd": _get_fresh_potd,
"list": _get_fresh_list

def _get_background(selected):
if not g.last_background_updates:
for key in _BACKGROUNDS:
g.last_background_updates[key] = datetime.min

plus_one = g.last_background_updates[selected] + timedelta(days=1)
max_age = datetime(plus_one.year, plus_one.month, plus_one.day)
if datetime.utcnow() > max_age:
update_func = _BACKGROUNDS.get(selected, _get_fresh_list)
g.background_data[selected] = update_func()
g.last_background_updates[selected] = datetime.utcnow()
return g.background_data[selected]

def set_background(selected):
if "CopyviosScreenCache" in g.cookies:
cache = g.cookies["CopyviosScreenCache"].value
screen = loads(cache)
except (ValueError, KeyError):
screen = {"width": 1024, "height": 768}
screen = {"width": 1024, "height": 768}

filename, url, descurl, width, height = _get_background(selected)
bg_url = _build_url(screen, filename, url, width, height)
g.descurl = descurl
return bg_url

+ 3
- 4
copyvios/checker.py Vedi File

@@ -9,8 +9,8 @@ from earwigbot.wiki.copyvios.markov import EMPTY, MarkovChain
from earwigbot.wiki.copyvios.parsers import ArticleTextParser
from earwigbot.wiki.copyvios.result import CopyvioSource, CopyvioCheckResult

from .misc import Query, get_cache_db
from .sites import get_site, get_sites
from .misc import Query, get_db
from .sites import get_site

__all__ = ["do_check", "T_POSSIBLE", "T_SUSPECT"]

@@ -30,7 +30,6 @@ def do_check(query=None):
if query.project:
query.project = query.project.lower()

query.all_langs, query.all_projects = get_sites()
query.submitted = query.project and query.lang and (query.title or query.oldid)
if query.submitted:
query.site = get_site(query)
@@ -61,7 +60,7 @@ def _get_results(query, follow=True):
if not query.action:
query.action = "compare" if query.url else "search"
if query.action == "search":
conn = get_cache_db()
conn = get_db()
use_engine = 0 if query.use_engine in ("0", "false") else 1
use_links = 0 if query.use_links in ("0", "false") else 1
if not use_engine and not use_links:

+ 9
- 17
copyvios/misc.py Vedi File

@@ -8,7 +8,7 @@ from sqlalchemy.pool import manage

oursql = manage(oursql)

__all__ = ["Query", "get_globals_db", "get_cache_db", "httpsfix", "urlstrip"]
__all__ = ["Query", "get_db", "httpsfix", "urlstrip"]

class Query(object):
def __init__(self, method="GET"):
@@ -27,22 +27,14 @@ class Query(object):
self.query[key] = value

def _connect_db(name):
args = g.bot.config.wiki["_copyviosSQL"][name]
args["read_default_file"] = expanduser("~/.my.cnf")
args["autoping"] = True
args["autoreconnect"] = True
return oursql.connect(**args)

def get_globals_db():
if not g.globals_db:
g.globals_db = _connect_db("globals")
return g.globals_db

def get_cache_db():
if not g.cache_db:
g.cache_db = _connect_db("cache")
return g.cache_db
def get_db():
if not g.db:
args = g.bot.config.wiki["_copyviosSQL"]
args["read_default_file"] = expanduser("~/.my.cnf")
args["autoping"] = True
args["autoreconnect"] = True
g.db = oursql.connect(**args)
return g.db

def httpsfix(context, url):
if url.startswith("http://"):

+ 19
- 73
copyvios/sites.py Vedi File

@@ -6,9 +6,7 @@ from urlparse import urlparse
from earwigbot import exceptions
from flask import g

from .misc import get_globals_db

__all__ = ["get_site", "get_sites"]
__all__ = ["get_site", "update_sites"]

def get_site(query):
lang, project, name = query.lang, query.project, query.name
@@ -31,35 +29,16 @@ def get_site(query):
except (exceptions.APIError, exceptions.LoginError):
return None

def get_sites():
max_staleness = 60 * 60 * 24 * 7
conn = get_globals_db()
query1 = "SELECT update_time FROM updates WHERE update_service = ?"
query2 = "SELECT lang_code, lang_name FROM language"
query3 = "SELECT project_code, project_name FROM project"
with conn.cursor() as cursor:
cursor.execute(query1, ("sites",))
time_since_update = int(time() - cursor.fetchall()[0][0])
except IndexError:
time_since_update = time()
if time_since_update > max_staleness:
langs = []
for code, name in cursor.fetchall():
if "\U" in name:
name = name.decode("unicode_escape")
langs.append((code, name))
projects = cursor.fetchall()
return langs, projects
def update_sites():
if time() - g.last_sites_update > 60 * 60 * 24 * 7:
g.langs, g.projects = _load_sites()
g.last_sites_update = time()

def _update_sites(cursor):
def _load_sites():
site = g.bot.wiki.get_site()
matrix = site.api_query(action="sitematrix")["sitematrix"]
del matrix["count"]
languages, projects = set(), set()
langs, projects = set(), set()
for site in matrix.itervalues():
if isinstance(site, list): # Special sites
bad_sites = ["closed", "private", "fishbowl"]
@@ -72,50 +51,17 @@ def _update_sites(cursor):
lang, project = full.rsplit(".", 2)[:2]
code = u"{0}::{1}".format(lang, special["dbname"])
name = special["code"].capitalize()
languages.add((code, u"{0} ({1})".format(lang, name)))
langs.add((code, u"{0} ({1})".format(lang, name)))
projects.add((project, project.capitalize()))
this = set()
for web in site["site"]:
if "closed" in web:
project = "wikipedia" if web["code"] == u"wiki" else web["code"]
this.add((project, project.capitalize()))
if this:
code = site["code"]
if "\U" in site["name"].encode("unicode_escape"):
name = site["name"].encode("unicode_escape")
name = site["name"]
languages.add((code, u"{0} ({1})".format(code, name)))
projects |= this
_save_site_updates(cursor, languages, projects)

def _save_site_updates(cursor, languages, projects):
query1 = "SELECT lang_code, lang_name FROM language"
query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?"
query3 = "INSERT INTO language VALUES (?, ?)"
query4 = "SELECT project_code, project_name FROM project"
query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?"
query6 = "INSERT INTO project VALUES (?, ?)"
query7 = "SELECT 1 FROM updates WHERE update_service = ?"
query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?"
query9 = "INSERT INTO updates VALUES (?, ?)"
_synchronize_sites_with_db(cursor, languages, query1, query2, query3)
_synchronize_sites_with_db(cursor, projects, query4, query5, query6)
cursor.execute(query7, ("sites",))
if cursor.fetchall():
cursor.execute(query8, (time(), "sites"))
cursor.execute(query9, ("sites", time()))

def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update):
removals = []
for site in cursor:
if site in updates:
cursor.executemany(q_rmv, removals)
cursor.executemany(q_update, updates)
this = set()
for web in site["site"]:
if "closed" in web:
proj = "wikipedia" if web["code"] == u"wiki" else web["code"]
this.add((proj, proj.capitalize()))
if this:
code = site["code"]
langs.add((code, u"{0} ({1})".format(code, site["name"])))
projects |= this
return langs, projects

+ 0
- 42
schema.sql Vedi File

@@ -1,60 +0,0 @@

CREATE DATABASE `u_earwig_copyvios`
DEFAULT COLLATE utf8_unicode_ci;

DROP TABLE IF EXISTS `background`;
CREATE TABLE `background` (
`background_id` int(9) unsigned NOT NULL,
`background_filename` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`background_url` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`background_descurl` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`background_width` int(9) unsigned DEFAULT NULL,
`background_height` int(9) unsigned DEFAULT NULL,
PRIMARY KEY (`background_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

CREATE TABLE `language` (
`lang_code` varchar(64) COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
`lang_name` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`lang_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

CREATE TABLE `project` (
`project_code` varchar(64) COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
`project_name` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`project_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

CREATE TABLE `updates` (
`update_service` varchar(128) COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
`update_time` int(10) unsigned DEFAULT NULL,
PRIMARY KEY (`update_service`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

+ 3
- 5
templates/api.mako Vedi File

@@ -1,6 +1,4 @@
from json import dumps
<%! from json import dumps %>\
<%def name="do_indent(size)">
<br />
% for i in xrange(size):
@@ -18,9 +16,9 @@
% elif isinstance(obj, (type([]), type(()))):
% for member in obj:
% for elem in obj:
${do_indent(indent + 1)}
${walk_json(member, indent + 1)}${"," if not loop.last else ""}
${walk_json(elem, indent + 1)}${"," if not loop.last else ""}
% endfor

+ 2
- 2
templates/index.mako Vedi File

@@ -51,7 +51,7 @@
<span class="mono">https://</span>
<select name="lang">
<% selected_lang = query.orig_lang if query.orig_lang else g.cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in g.cookies else g.bot.wiki.get_site().lang %>\
% for code, name in query.all_langs:
% for code, name in g.langs:
% if code == selected_lang:
<option value="${code | h}" selected="selected">${name}</option>
% else:
@@ -62,7 +62,7 @@
<span class="mono">.</span>
<select name="project">
<% selected_project = query.project if query.project else g.cookies["CopyviosDefaultProject"].value if "CopyviosDefaultProject" in g.cookies else g.bot.wiki.get_site().project %>\
% for code, name in query.all_projects:
% for code, name in g.projects:
% if code == selected_project:
<option value="${code | h}" selected="selected">${name}</option>
% else:

+ 2
- 2
templates/settings.mako Vedi File

@@ -17,7 +17,7 @@
<span class="mono">https://</span>
<select name="lang">
<% selected_lang = g.cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in g.cookies else default_lang %>\
% for code, name in langs:
% for code, name in g.langs:
% if code == selected_lang:
<option value="${code | h}" selected="selected">${name}</option>
% else:
@@ -28,7 +28,7 @@
<span class="mono">.</span>
<select name="project">
<% selected_project = g.cookies["CopyviosDefaultProject"].value if "CopyviosDefaultProject" in g.cookies else default_project %>\
% for code, name in projects:
% for code, name in g.projects:
% if code == selected_project:
<option value="${code | h}" selected="selected">${name}</option>
% else:
