From 56f23e682a24c3b199cc7add1447cf4130ba2657 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 4 May 2014 01:18:30 -0400 Subject: [PATCH] Database to v6; flesh out a lot of Database.search(). --- bitshift/database/__init__.py | 65 ++++++++++++++++++++++++++++-------------- bitshift/database/migration.py | 30 ++++++++++++++++++- bitshift/database/schema.sql | 23 +++++++++------ 3 files changed, 86 insertions(+), 32 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 9b039ca..75f39da 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -51,10 +51,15 @@ class Database(object): "Run `python -m bitshift.database.migration`." raise RuntimeError(err) + def _get_codelets_from_ids(self, cursor, ids): + """Return a list of Codelet objects given a list of codelet IDs.""" + raise NotImplementedError() ## TODO + def _decompose_url(self, cursor, url): """Break up a URL into an origin (with a URL base) and a suffix.""" query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) - FROM origins WHERE origin_url_base IS NOT NULL + FROM origins + WHERE origin_url_base IS NOT NULL AND ? LIKE CONCAT(origin_url_base, "%")""" cursor.execute(query, (url, url)) @@ -88,19 +93,35 @@ class Database(object): :param page: The result page to display. :type page: int - :return: A list of search results. - :rtype: list of :py:class:`.Codelet`\ s + :return: The total number of results, and the *n*\ th page of results. + :rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) """ - # search for cache_hash = mmh3.hash(query.serialize() + str(page)) - # cache HIT: - # update cache_last_used - # return codelets - # cache MISS: - # build complex search query - # fetch codelets - # cache results - # return codelets - pass + query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp + FROM cache + INNER JOIN cache_data ON cache_id = cdata_cache + WHERE cache_id = ?""" + query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" + query3 = "INSERT INTO cache_data VALUES (?, ?)" + + cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] + + with self._conn.cursor() as cursor: + cursor.execute(query1, (cache_id,)) + results = cursor.fetchall() + if results: # Cache hit + num_results = results[0][1] * (10 ** results[0][2]) + ids = [res[0] for res in results] + else: # Cache miss + ## TODO: build and execute search query + results = cursor.fetchall() + ids = NotImplemented ## TODO: extract ids from results + num_results = NotImplemented ## TODO: num if results else 0 + num_exp = max(len(str(num_results)) - 3, 0) + num_results = int(round(num_results, -num_exp)) + num_mnt = num_results / (10 ** num_exp) + cursor.execute(query2, (cache_id, num_mnt, num_exp)) + cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) + return (num_results, self._get_codelets_from_ids(cursor, ids)) def insert(self, codelet): """ @@ -109,23 +130,23 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ - query1 = """INSERT INTO code VALUES (?, ?) + query1 = """INSERT INTO code VALUES (?, ?, ?) ON DUPLICATE KEY UPDATE code_id=code_id""" query2 = """INSERT INTO codelets VALUES - (DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)""" + (DEFAULT, ?, ?, ?, ?, ?, ?, ?)""" query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" - with self._conn.cursor() as cursor: - code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] - origin, url = self._decompose_url(cursor, codelet.url) + hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8") + code_id = mmh3.hash64(hash_key)[0] - cursor.execute(query1, (code_id, codelet.code)) + with self._conn.cursor() as cursor: + cursor.execute(query1, (code_id, codelet.language, codelet.code)) if cursor.rowcount == 1: for sym_type, symbols in codelet.symbols.iteritems(): self._insert_symbols(cursor, code_id, sym_type, symbols) - cursor.execute(query2, (codelet.name, code_id, codelet.language, - origin, url, codelet.rank, - codelet.date_created, + origin, url = self._decompose_url(cursor, codelet.url) + cursor.execute(query2, (codelet.name, code_id, origin, url, + codelet.rank, codelet.date_created, codelet.date_modified)) codelet_id = cursor.lastrowid authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py index 743f906..24f744a 100644 --- a/bitshift/database/migration.py +++ b/bitshift/database/migration.py @@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update between them. """ -VERSION = 5 +VERSION = 6 MIGRATIONS = [ # 1 -> 2 @@ -60,6 +60,34 @@ MIGRATIONS = [ MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" + ], + # 5 -> 6 + [ + """ALTER TABLE `code` + ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL + AFTER `code_id`, + ADD KEY (`code_lang`)""", + """ALTER TABLE `codelets` + DROP KEY `codelet_lang`, + DROP COLUMN `codelet_lang`""", + """ALTER TABLE `cache_data` + DROP FOREIGN KEY `cache_data_ibfk_1`""", + """ALTER TABLE `cache` + MODIFY COLUMN `cache_id` BIGINT NOT NULL, + DROP COLUMN `cache_hash`, + DROP COLUMN `cache_last_used`, + MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""", + """ALTER TABLE `cache_data` + MODIFY COLUMN `cdata_cache` BIGINT NOT NULL, + ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`), + ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`) + REFERENCES `codelets` (`codelet_id`) + ON DELETE CASCADE ON UPDATE CASCADE""", + """CREATE EVENT `flush_cache` + ON SCHEDULE EVERY 1 HOUR + DO + DELETE FROM `cache` + WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);""" ] ] diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 50b4f9e..8634416 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,4 +1,4 @@ --- Schema version 5 +-- Schema version 6 CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; @@ -6,7 +6,7 @@ USE `bitshift`; CREATE TABLE `version` ( `version` INT UNSIGNED NOT NULL ) ENGINE=InnoDB; -INSERT INTO `version` VALUES (5); +INSERT INTO `version` VALUES (6); CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, @@ -20,8 +20,10 @@ INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL); CREATE TABLE `code` ( `code_id` BIGINT NOT NULL, + `code_lang` SMALLINT UNSIGNED DEFAULT NULL, `code_code` MEDIUMTEXT NOT NULL, PRIMARY KEY (`code_id`), + KEY (`code_lang`), FULLTEXT KEY (`code_code`) ) ENGINE=InnoDB; @@ -29,7 +31,6 @@ CREATE TABLE `codelets` ( `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, `codelet_name` VARCHAR(300) NOT NULL, `codelet_code_id` BIGINT NOT NULL, - `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, `codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_url` VARCHAR(512) NOT NULL, `codelet_rank` FLOAT NOT NULL, @@ -37,7 +38,6 @@ CREATE TABLE `codelets` ( `codelet_date_modified` DATETIME DEFAULT NULL, PRIMARY KEY (`codelet_id`), FULLTEXT KEY (`codelet_name`), - KEY (`codelet_lang`), KEY (`codelet_rank`), KEY (`codelet_date_created`), KEY (`codelet_date_modified`), @@ -88,18 +88,17 @@ CREATE TABLE `symbol_locations` ( ) ENGINE=InnoDB; CREATE TABLE `cache` ( - `cache_id` INT UNSIGNED NOT NULL AUTO_INCREMENT, - `cache_hash` BIGINT NOT NULL, - `cache_count_mnt` TINYINT UNSIGNED NOT NULL, + `cache_id` BIGINT NOT NULL, + `cache_count_mnt` SMALLINT UNSIGNED NOT NULL, `cache_count_exp` TINYINT UNSIGNED NOT NULL, `cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - `cache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`cache_id`) ) ENGINE=InnoDB; CREATE TABLE `cache_data` ( - `cdata_cache` INT UNSIGNED NOT NULL, + `cdata_cache` BIGINT NOT NULL, `cdata_codelet` BIGINT UNSIGNED NOT NULL, + PRIMARY KEY (`cdata_cache`, `cdata_codelet`), FOREIGN KEY (`cdata_cache`) REFERENCES `cache` (`cache_id`) ON DELETE CASCADE ON UPDATE CASCADE, @@ -107,3 +106,9 @@ CREATE TABLE `cache_data` ( REFERENCES `codelets` (`codelet_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB; + +CREATE EVENT `flush_cache` + ON SCHEDULE EVERY 1 HOUR + DO + DELETE FROM `cache` + WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);