diff --git a/copyvios/checker.py b/copyvios/checker.py index ad66d21..dfd652a 100644 --- a/copyvios/checker.py +++ b/copyvios/checker.py @@ -9,7 +9,7 @@ from earwigbot.wiki.copyvios.markov import EMPTY, MarkovChain from earwigbot.wiki.copyvios.parsers import ArticleTextParser from earwigbot.wiki.copyvios.result import CopyvioSource, CopyvioCheckResult -from .misc import Query, get_db, get_cursor +from .misc import Query, get_db, get_cursor, sql_dialect from .sites import get_site from .turnitin import search_turnitin @@ -129,8 +129,9 @@ def _get_page_by_revid(site, revid): return page def _get_cached_results(page, conn, mode, noskip): - query1 = """DELETE FROM cache - WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)""" + expiry = sql_dialect(mysql="DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)", + sqlite="STRFTIME('%s', 'now', '-3 days')") + query1 = "DELETE FROM cache WHERE cache_time < %s" % expiry query2 = """SELECT cache_time, cache_queries, cache_process_time, cache_possible_miss FROM cache @@ -149,6 +150,8 @@ def _get_cached_results(page, conn, mode, noskip): cache_time, queries, check_time, possible_miss = results[0] if possible_miss and noskip: return None + if not isinstance(cache_time, datetime): + cache_time = datetime.utcfromtimestamp(cache_time) cursor.execute(query3, (cache_id,)) data = cursor.fetchall() @@ -196,8 +199,11 @@ def _format_date(cache_time): def _cache_result(page, result, conn, mode): query1 = "DELETE FROM cache WHERE cache_id = ?" - query2 = "INSERT INTO cache VALUES (?, DEFAULT, ?, ?, ?)" - query3 = "INSERT INTO cache_data VALUES (DEFAULT, ?, ?, ?, ?, ?)" + query2 = """INSERT INTO cache (cache_id, cache_queries, cache_process_time, + cache_possible_miss) VALUES (?, ?, ?, ?)""" + query3 = """INSERT INTO cache_data (cdata_cache_id, cdata_url, + cdata_confidence, cdata_skipped, + cdata_excluded) VALUES (?, ?, ?, ?, ?)""" cache_id = buffer(sha256(mode + page.get().encode("utf8")).digest()) data = [(cache_id, source.url[:1024], source.confidence, source.skipped, source.excluded) diff --git a/copyvios/misc.py b/copyvios/misc.py index 36c9ce1..58e5dcd 100644 --- a/copyvios/misc.py +++ b/copyvios/misc.py @@ -51,7 +51,9 @@ def _connect_to_db(engine, args): if engine == "sqlite": import apsw dbpath = join(cache.bot.config.root_dir, "copyvios.db") - return apsw.Connection(dbpath) + conn = apsw.Connection(dbpath) + conn.cursor().execute("PRAGMA foreign_keys = ON") + return conn raise ValueError("Unknown engine: %s" % engine) def get_db(): @@ -72,6 +74,13 @@ def get_cursor(conn): else: raise ValueError("Unknown engine: %s" % g._engine) +def sql_dialect(mysql, sqlite): + if g._engine == "mysql": + return mysql + if g._engine == "sqlite": + return sqlite + raise ValueError("Unknown engine: %s" % g._engine) + def get_notice(): try: with open(expanduser("~/copyvios_notice.html")) as fp: diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..58516f2 --- /dev/null +++ b/schema.sql @@ -0,0 +1,20 @@ +CREATE TABLE cache ( + cache_id BLOB NOT NULL, + cache_time INTEGER NOT NULL DEFAULT (STRFTIME('%s', 'now')), + cache_queries INTEGER NOT NULL DEFAULT 0, + cache_process_time REAL NOT NULL DEFAULT 0, + cache_possible_miss INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (cache_id) +); +CREATE INDEX cache_time_idx ON cache (cache_time); +CREATE TABLE cache_data ( + cdata_id ROWID, + cdata_cache_id BLOB NOT NULL, + cdata_url TEXT NOT NULL, + cdata_confidence REAL NOT NULL DEFAULT 0, + cdata_skipped INTEGER NOT NULL DEFAULT 0, + cdata_excluded INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (cdata_id), + FOREIGN KEY (cdata_cache_id) REFERENCES cache (cache_id) + ON DELETE CASCADE ON UPDATE CASCADE +);