@@ -51,10 +51,15 @@ class Database(object): | |||
"Run `python -m bitshift.database.migration`." | |||
raise RuntimeError(err) | |||
def _get_codelets_from_ids(self, cursor, ids): | |||
"""Return a list of Codelet objects given a list of codelet IDs.""" | |||
raise NotImplementedError() ## TODO | |||
def _decompose_url(self, cursor, url): | |||
"""Break up a URL into an origin (with a URL base) and a suffix.""" | |||
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) | |||
FROM origins WHERE origin_url_base IS NOT NULL | |||
FROM origins | |||
WHERE origin_url_base IS NOT NULL | |||
AND ? LIKE CONCAT(origin_url_base, "%")""" | |||
cursor.execute(query, (url, url)) | |||
@@ -88,19 +93,35 @@ class Database(object): | |||
:param page: The result page to display. | |||
:type page: int | |||
:return: A list of search results. | |||
:rtype: list of :py:class:`.Codelet`\ s | |||
:return: The total number of results, and the *n*\ th page of results. | |||
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) | |||
""" | |||
# search for cache_hash = mmh3.hash(query.serialize() + str(page)) | |||
# cache HIT: | |||
# update cache_last_used | |||
# return codelets | |||
# cache MISS: | |||
# build complex search query | |||
# fetch codelets | |||
# cache results | |||
# return codelets | |||
pass | |||
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||
FROM cache | |||
INNER JOIN cache_data ON cache_id = cdata_cache | |||
WHERE cache_id = ?""" | |||
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||
query3 = "INSERT INTO cache_data VALUES (?, ?)" | |||
cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] | |||
with self._conn.cursor() as cursor: | |||
cursor.execute(query1, (cache_id,)) | |||
results = cursor.fetchall() | |||
if results: # Cache hit | |||
num_results = results[0][1] * (10 ** results[0][2]) | |||
ids = [res[0] for res in results] | |||
else: # Cache miss | |||
## TODO: build and execute search query | |||
results = cursor.fetchall() | |||
ids = NotImplemented ## TODO: extract ids from results | |||
num_results = NotImplemented ## TODO: num if results else 0 | |||
num_exp = max(len(str(num_results)) - 3, 0) | |||
num_results = int(round(num_results, -num_exp)) | |||
num_mnt = num_results / (10 ** num_exp) | |||
cursor.execute(query2, (cache_id, num_mnt, num_exp)) | |||
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) | |||
return (num_results, self._get_codelets_from_ids(cursor, ids)) | |||
def insert(self, codelet): | |||
""" | |||
@@ -109,23 +130,23 @@ class Database(object): | |||
:param codelet: The codelet to insert. | |||
:type codelet: :py:class:`.Codelet` | |||
""" | |||
query1 = """INSERT INTO code VALUES (?, ?) | |||
query1 = """INSERT INTO code VALUES (?, ?, ?) | |||
ON DUPLICATE KEY UPDATE code_id=code_id""" | |||
query2 = """INSERT INTO codelets VALUES | |||
(DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)""" | |||
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)""" | |||
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" | |||
with self._conn.cursor() as cursor: | |||
code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] | |||
origin, url = self._decompose_url(cursor, codelet.url) | |||
hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8") | |||
code_id = mmh3.hash64(hash_key)[0] | |||
cursor.execute(query1, (code_id, codelet.code)) | |||
with self._conn.cursor() as cursor: | |||
cursor.execute(query1, (code_id, codelet.language, codelet.code)) | |||
if cursor.rowcount == 1: | |||
for sym_type, symbols in codelet.symbols.iteritems(): | |||
self._insert_symbols(cursor, code_id, sym_type, symbols) | |||
cursor.execute(query2, (codelet.name, code_id, codelet.language, | |||
origin, url, codelet.rank, | |||
codelet.date_created, | |||
origin, url = self._decompose_url(cursor, codelet.url) | |||
cursor.execute(query2, (codelet.name, code_id, origin, url, | |||
codelet.rank, codelet.date_created, | |||
codelet.date_modified)) | |||
codelet_id = cursor.lastrowid | |||
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] | |||
@@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update | |||
between them. | |||
""" | |||
VERSION = 5 | |||
VERSION = 6 | |||
MIGRATIONS = [ | |||
# 1 -> 2 | |||
@@ -60,6 +60,34 @@ MIGRATIONS = [ | |||
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, | |||
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, | |||
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" | |||
], | |||
# 5 -> 6 | |||
[ | |||
"""ALTER TABLE `code` | |||
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL | |||
AFTER `code_id`, | |||
ADD KEY (`code_lang`)""", | |||
"""ALTER TABLE `codelets` | |||
DROP KEY `codelet_lang`, | |||
DROP COLUMN `codelet_lang`""", | |||
"""ALTER TABLE `cache_data` | |||
DROP FOREIGN KEY `cache_data_ibfk_1`""", | |||
"""ALTER TABLE `cache` | |||
MODIFY COLUMN `cache_id` BIGINT NOT NULL, | |||
DROP COLUMN `cache_hash`, | |||
DROP COLUMN `cache_last_used`, | |||
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""", | |||
"""ALTER TABLE `cache_data` | |||
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL, | |||
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`) | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE""", | |||
"""CREATE EVENT `flush_cache` | |||
ON SCHEDULE EVERY 1 HOUR | |||
DO | |||
DELETE FROM `cache` | |||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);""" | |||
] | |||
] | |||
@@ -1,4 +1,4 @@ | |||
-- Schema version 6 | |||
CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; | |||
USE `bitshift`; | |||
@@ -6,7 +6,7 @@ USE `bitshift`; | |||
CREATE TABLE `version` ( | |||
`version` INT UNSIGNED NOT NULL | |||
) ENGINE=InnoDB; | |||
INSERT INTO `version` VALUES (5); | |||
INSERT INTO `version` VALUES (6); | |||
CREATE TABLE `origins` ( | |||
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
@@ -20,8 +20,10 @@ INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL); | |||
CREATE TABLE `code` ( | |||
`code_id` BIGINT NOT NULL, | |||
`code_lang` SMALLINT UNSIGNED DEFAULT NULL, | |||
`code_code` MEDIUMTEXT NOT NULL, | |||
PRIMARY KEY (`code_id`), | |||
KEY (`code_lang`), | |||
FULLTEXT KEY (`code_code`) | |||
) ENGINE=InnoDB; | |||
@@ -29,7 +31,6 @@ CREATE TABLE `codelets` ( | |||
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`codelet_name` VARCHAR(300) NOT NULL, | |||
`codelet_code_id` BIGINT NOT NULL, | |||
`codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, | |||
`codelet_origin` TINYINT UNSIGNED NOT NULL, | |||
`codelet_url` VARCHAR(512) NOT NULL, | |||
`codelet_rank` FLOAT NOT NULL, | |||
@@ -37,7 +38,6 @@ CREATE TABLE `codelets` ( | |||
`codelet_date_modified` DATETIME DEFAULT NULL, | |||
PRIMARY KEY (`codelet_id`), | |||
FULLTEXT KEY (`codelet_name`), | |||
KEY (`codelet_lang`), | |||
KEY (`codelet_rank`), | |||
KEY (`codelet_date_created`), | |||
KEY (`codelet_date_modified`), | |||
@@ -88,18 +88,17 @@ CREATE TABLE `symbol_locations` ( | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `cache` ( | |||
`cache_id` INT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`cache_hash` BIGINT NOT NULL, | |||
`cache_count_mnt` TINYINT UNSIGNED NOT NULL, | |||
`cache_id` BIGINT NOT NULL, | |||
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL, | |||
`cache_count_exp` TINYINT UNSIGNED NOT NULL, | |||
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||
`cache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||
PRIMARY KEY (`cache_id`) | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `cache_data` ( | |||
`cdata_cache` INT UNSIGNED NOT NULL, | |||
`cdata_cache` BIGINT NOT NULL, | |||
`cdata_codelet` BIGINT UNSIGNED NOT NULL, | |||
PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||
FOREIGN KEY (`cdata_cache`) | |||
REFERENCES `cache` (`cache_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE, | |||
@@ -107,3 +106,9 @@ CREATE TABLE `cache_data` ( | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE EVENT `flush_cache` | |||
ON SCHEDULE EVERY 1 HOUR | |||
DO | |||
DELETE FROM `cache` | |||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY); |