Selaa lähdekoodia

Database to v6; flesh out a lot of Database.search().

tags/v1.0^2
Ben Kurtovic 10 vuotta sitten
vanhempi
commit
56f23e682a
3 muutettua tiedostoa jossa 86 lisäystä ja 31 poistoa
  1. +43
    -22
      bitshift/database/__init__.py
  2. +29
    -1
      bitshift/database/migration.py
  3. +14
    -8
      bitshift/database/schema.sql

+ 43
- 22
bitshift/database/__init__.py Näytä tiedosto

@@ -51,10 +51,15 @@ class Database(object):
"Run `python -m bitshift.database.migration`." "Run `python -m bitshift.database.migration`."
raise RuntimeError(err) raise RuntimeError(err)


def _get_codelets_from_ids(self, cursor, ids):
"""Return a list of Codelet objects given a list of codelet IDs."""
raise NotImplementedError() ## TODO

def _decompose_url(self, cursor, url): def _decompose_url(self, cursor, url):
"""Break up a URL into an origin (with a URL base) and a suffix.""" """Break up a URL into an origin (with a URL base) and a suffix."""
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base))
FROM origins WHERE origin_url_base IS NOT NULL
FROM origins
WHERE origin_url_base IS NOT NULL
AND ? LIKE CONCAT(origin_url_base, "%")""" AND ? LIKE CONCAT(origin_url_base, "%")"""


cursor.execute(query, (url, url)) cursor.execute(query, (url, url))
@@ -88,19 +93,35 @@ class Database(object):
:param page: The result page to display. :param page: The result page to display.
:type page: int :type page: int


:return: A list of search results.
:rtype: list of :py:class:`.Codelet`\ s
:return: The total number of results, and the *n*\ th page of results.
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s)
""" """
# search for cache_hash = mmh3.hash(query.serialize() + str(page))
# cache HIT:
# update cache_last_used
# return codelets
# cache MISS:
# build complex search query
# fetch codelets
# cache results
# return codelets
pass
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp
FROM cache
INNER JOIN cache_data ON cache_id = cdata_cache
WHERE cache_id = ?"""
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)"
query3 = "INSERT INTO cache_data VALUES (?, ?)"

cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0]

with self._conn.cursor() as cursor:
cursor.execute(query1, (cache_id,))
results = cursor.fetchall()
if results: # Cache hit
num_results = results[0][1] * (10 ** results[0][2])
ids = [res[0] for res in results]
else: # Cache miss
## TODO: build and execute search query
results = cursor.fetchall()
ids = NotImplemented ## TODO: extract ids from results
num_results = NotImplemented ## TODO: num if results else 0
num_exp = max(len(str(num_results)) - 3, 0)
num_results = int(round(num_results, -num_exp))
num_mnt = num_results / (10 ** num_exp)
cursor.execute(query2, (cache_id, num_mnt, num_exp))
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids])
return (num_results, self._get_codelets_from_ids(cursor, ids))


def insert(self, codelet): def insert(self, codelet):
""" """
@@ -109,23 +130,23 @@ class Database(object):
:param codelet: The codelet to insert. :param codelet: The codelet to insert.
:type codelet: :py:class:`.Codelet` :type codelet: :py:class:`.Codelet`
""" """
query1 = """INSERT INTO code VALUES (?, ?)
query1 = """INSERT INTO code VALUES (?, ?, ?)
ON DUPLICATE KEY UPDATE code_id=code_id""" ON DUPLICATE KEY UPDATE code_id=code_id"""
query2 = """INSERT INTO codelets VALUES query2 = """INSERT INTO codelets VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)"""
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)"""
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)"


with self._conn.cursor() as cursor:
code_id = mmh3.hash64(codelet.code.encode("utf8"))[0]
origin, url = self._decompose_url(cursor, codelet.url)
hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8")
code_id = mmh3.hash64(hash_key)[0]


cursor.execute(query1, (code_id, codelet.code))
with self._conn.cursor() as cursor:
cursor.execute(query1, (code_id, codelet.language, codelet.code))
if cursor.rowcount == 1: if cursor.rowcount == 1:
for sym_type, symbols in codelet.symbols.iteritems(): for sym_type, symbols in codelet.symbols.iteritems():
self._insert_symbols(cursor, code_id, sym_type, symbols) self._insert_symbols(cursor, code_id, sym_type, symbols)
cursor.execute(query2, (codelet.name, code_id, codelet.language,
origin, url, codelet.rank,
codelet.date_created,
origin, url = self._decompose_url(cursor, codelet.url)
cursor.execute(query2, (codelet.name, code_id, origin, url,
codelet.rank, codelet.date_created,
codelet.date_modified)) codelet.date_modified))
codelet_id = cursor.lastrowid codelet_id = cursor.lastrowid
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]


+ 29
- 1
bitshift/database/migration.py Näytä tiedosto

@@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update
between them. between them.
""" """


VERSION = 5
VERSION = 6


MIGRATIONS = [ MIGRATIONS = [
# 1 -> 2 # 1 -> 2
@@ -60,6 +60,34 @@ MIGRATIONS = [
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL,
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL,
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL"""
],
# 5 -> 6
[
"""ALTER TABLE `code`
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL
AFTER `code_id`,
ADD KEY (`code_lang`)""",
"""ALTER TABLE `codelets`
DROP KEY `codelet_lang`,
DROP COLUMN `codelet_lang`""",
"""ALTER TABLE `cache_data`
DROP FOREIGN KEY `cache_data_ibfk_1`""",
"""ALTER TABLE `cache`
MODIFY COLUMN `cache_id` BIGINT NOT NULL,
DROP COLUMN `cache_hash`,
DROP COLUMN `cache_last_used`,
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""",
"""ALTER TABLE `cache_data`
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL,
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE""",
"""CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);"""
] ]
] ]




+ 14
- 8
bitshift/database/schema.sql Näytä tiedosto

@@ -1,4 +1,4 @@
-- Schema version 6


CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
USE `bitshift`; USE `bitshift`;
@@ -6,7 +6,7 @@ USE `bitshift`;
CREATE TABLE `version` ( CREATE TABLE `version` (
`version` INT UNSIGNED NOT NULL `version` INT UNSIGNED NOT NULL
) ENGINE=InnoDB; ) ENGINE=InnoDB;
INSERT INTO `version` VALUES (5);
INSERT INTO `version` VALUES (6);


CREATE TABLE `origins` ( CREATE TABLE `origins` (
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT,
@@ -20,8 +20,10 @@ INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL);


CREATE TABLE `code` ( CREATE TABLE `code` (
`code_id` BIGINT NOT NULL, `code_id` BIGINT NOT NULL,
`code_lang` SMALLINT UNSIGNED DEFAULT NULL,
`code_code` MEDIUMTEXT NOT NULL, `code_code` MEDIUMTEXT NOT NULL,
PRIMARY KEY (`code_id`), PRIMARY KEY (`code_id`),
KEY (`code_lang`),
FULLTEXT KEY (`code_code`) FULLTEXT KEY (`code_code`)
) ENGINE=InnoDB; ) ENGINE=InnoDB;


@@ -29,7 +31,6 @@ CREATE TABLE `codelets` (
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`codelet_name` VARCHAR(300) NOT NULL, `codelet_name` VARCHAR(300) NOT NULL,
`codelet_code_id` BIGINT NOT NULL, `codelet_code_id` BIGINT NOT NULL,
`codelet_lang` SMALLINT UNSIGNED DEFAULT NULL,
`codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_origin` TINYINT UNSIGNED NOT NULL,
`codelet_url` VARCHAR(512) NOT NULL, `codelet_url` VARCHAR(512) NOT NULL,
`codelet_rank` FLOAT NOT NULL, `codelet_rank` FLOAT NOT NULL,
@@ -37,7 +38,6 @@ CREATE TABLE `codelets` (
`codelet_date_modified` DATETIME DEFAULT NULL, `codelet_date_modified` DATETIME DEFAULT NULL,
PRIMARY KEY (`codelet_id`), PRIMARY KEY (`codelet_id`),
FULLTEXT KEY (`codelet_name`), FULLTEXT KEY (`codelet_name`),
KEY (`codelet_lang`),
KEY (`codelet_rank`), KEY (`codelet_rank`),
KEY (`codelet_date_created`), KEY (`codelet_date_created`),
KEY (`codelet_date_modified`), KEY (`codelet_date_modified`),
@@ -88,18 +88,17 @@ CREATE TABLE `symbol_locations` (
) ENGINE=InnoDB; ) ENGINE=InnoDB;


CREATE TABLE `cache` ( CREATE TABLE `cache` (
`cache_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`cache_hash` BIGINT NOT NULL,
`cache_count_mnt` TINYINT UNSIGNED NOT NULL,
`cache_id` BIGINT NOT NULL,
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL,
`cache_count_exp` TINYINT UNSIGNED NOT NULL, `cache_count_exp` TINYINT UNSIGNED NOT NULL,
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, `cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`cache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`cache_id`) PRIMARY KEY (`cache_id`)
) ENGINE=InnoDB; ) ENGINE=InnoDB;


CREATE TABLE `cache_data` ( CREATE TABLE `cache_data` (
`cdata_cache` INT UNSIGNED NOT NULL,
`cdata_cache` BIGINT NOT NULL,
`cdata_codelet` BIGINT UNSIGNED NOT NULL, `cdata_codelet` BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
FOREIGN KEY (`cdata_cache`) FOREIGN KEY (`cdata_cache`)
REFERENCES `cache` (`cache_id`) REFERENCES `cache` (`cache_id`)
ON DELETE CASCADE ON UPDATE CASCADE, ON DELETE CASCADE ON UPDATE CASCADE,
@@ -107,3 +106,9 @@ CREATE TABLE `cache_data` (
REFERENCES `codelets` (`codelet_id`) REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB; ) ENGINE=InnoDB;

CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);

Ladataan…
Peruuta
Tallenna