Bladeren bron

Database to v6; flesh out a lot of Database.search().

tags/v1.0^2
Ben Kurtovic 10 jaren geleden
bovenliggende
commit
56f23e682a
3 gewijzigde bestanden met toevoegingen van 86 en 31 verwijderingen
  1. +43
    -22
      bitshift/database/__init__.py
  2. +29
    -1
      bitshift/database/migration.py
  3. +14
    -8
      bitshift/database/schema.sql

+ 43
- 22
bitshift/database/__init__.py Bestand weergeven

@@ -51,10 +51,15 @@ class Database(object):
"Run `python -m bitshift.database.migration`."
raise RuntimeError(err)

def _get_codelets_from_ids(self, cursor, ids):
"""Return a list of Codelet objects given a list of codelet IDs."""
raise NotImplementedError() ## TODO

def _decompose_url(self, cursor, url):
"""Break up a URL into an origin (with a URL base) and a suffix."""
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base))
FROM origins WHERE origin_url_base IS NOT NULL
FROM origins
WHERE origin_url_base IS NOT NULL
AND ? LIKE CONCAT(origin_url_base, "%")"""

cursor.execute(query, (url, url))
@@ -88,19 +93,35 @@ class Database(object):
:param page: The result page to display.
:type page: int

:return: A list of search results.
:rtype: list of :py:class:`.Codelet`\ s
:return: The total number of results, and the *n*\ th page of results.
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s)
"""
# search for cache_hash = mmh3.hash(query.serialize() + str(page))
# cache HIT:
# update cache_last_used
# return codelets
# cache MISS:
# build complex search query
# fetch codelets
# cache results
# return codelets
pass
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp
FROM cache
INNER JOIN cache_data ON cache_id = cdata_cache
WHERE cache_id = ?"""
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)"
query3 = "INSERT INTO cache_data VALUES (?, ?)"

cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0]

with self._conn.cursor() as cursor:
cursor.execute(query1, (cache_id,))
results = cursor.fetchall()
if results: # Cache hit
num_results = results[0][1] * (10 ** results[0][2])
ids = [res[0] for res in results]
else: # Cache miss
## TODO: build and execute search query
results = cursor.fetchall()
ids = NotImplemented ## TODO: extract ids from results
num_results = NotImplemented ## TODO: num if results else 0
num_exp = max(len(str(num_results)) - 3, 0)
num_results = int(round(num_results, -num_exp))
num_mnt = num_results / (10 ** num_exp)
cursor.execute(query2, (cache_id, num_mnt, num_exp))
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids])
return (num_results, self._get_codelets_from_ids(cursor, ids))

def insert(self, codelet):
"""
@@ -109,23 +130,23 @@ class Database(object):
:param codelet: The codelet to insert.
:type codelet: :py:class:`.Codelet`
"""
query1 = """INSERT INTO code VALUES (?, ?)
query1 = """INSERT INTO code VALUES (?, ?, ?)
ON DUPLICATE KEY UPDATE code_id=code_id"""
query2 = """INSERT INTO codelets VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)"""
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)"""
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)"

with self._conn.cursor() as cursor:
code_id = mmh3.hash64(codelet.code.encode("utf8"))[0]
origin, url = self._decompose_url(cursor, codelet.url)
hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8")
code_id = mmh3.hash64(hash_key)[0]

cursor.execute(query1, (code_id, codelet.code))
with self._conn.cursor() as cursor:
cursor.execute(query1, (code_id, codelet.language, codelet.code))
if cursor.rowcount == 1:
for sym_type, symbols in codelet.symbols.iteritems():
self._insert_symbols(cursor, code_id, sym_type, symbols)
cursor.execute(query2, (codelet.name, code_id, codelet.language,
origin, url, codelet.rank,
codelet.date_created,
origin, url = self._decompose_url(cursor, codelet.url)
cursor.execute(query2, (codelet.name, code_id, origin, url,
codelet.rank, codelet.date_created,
codelet.date_modified))
codelet_id = cursor.lastrowid
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]


+ 29
- 1
bitshift/database/migration.py Bestand weergeven

@@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update
between them.
"""

VERSION = 5
VERSION = 6

MIGRATIONS = [
# 1 -> 2
@@ -60,6 +60,34 @@ MIGRATIONS = [
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL,
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL,
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL"""
],
# 5 -> 6
[
"""ALTER TABLE `code`
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL
AFTER `code_id`,
ADD KEY (`code_lang`)""",
"""ALTER TABLE `codelets`
DROP KEY `codelet_lang`,
DROP COLUMN `codelet_lang`""",
"""ALTER TABLE `cache_data`
DROP FOREIGN KEY `cache_data_ibfk_1`""",
"""ALTER TABLE `cache`
MODIFY COLUMN `cache_id` BIGINT NOT NULL,
DROP COLUMN `cache_hash`,
DROP COLUMN `cache_last_used`,
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""",
"""ALTER TABLE `cache_data`
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL,
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE""",
"""CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);"""
]
]



+ 14
- 8
bitshift/database/schema.sql Bestand weergeven

@@ -1,4 +1,4 @@
-- Schema version 6

CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
USE `bitshift`;
@@ -6,7 +6,7 @@ USE `bitshift`;
CREATE TABLE `version` (
`version` INT UNSIGNED NOT NULL
) ENGINE=InnoDB;
INSERT INTO `version` VALUES (5);
INSERT INTO `version` VALUES (6);

CREATE TABLE `origins` (
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT,
@@ -20,8 +20,10 @@ INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL);

CREATE TABLE `code` (
`code_id` BIGINT NOT NULL,
`code_lang` SMALLINT UNSIGNED DEFAULT NULL,
`code_code` MEDIUMTEXT NOT NULL,
PRIMARY KEY (`code_id`),
KEY (`code_lang`),
FULLTEXT KEY (`code_code`)
) ENGINE=InnoDB;

@@ -29,7 +31,6 @@ CREATE TABLE `codelets` (
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`codelet_name` VARCHAR(300) NOT NULL,
`codelet_code_id` BIGINT NOT NULL,
`codelet_lang` SMALLINT UNSIGNED DEFAULT NULL,
`codelet_origin` TINYINT UNSIGNED NOT NULL,
`codelet_url` VARCHAR(512) NOT NULL,
`codelet_rank` FLOAT NOT NULL,
@@ -37,7 +38,6 @@ CREATE TABLE `codelets` (
`codelet_date_modified` DATETIME DEFAULT NULL,
PRIMARY KEY (`codelet_id`),
FULLTEXT KEY (`codelet_name`),
KEY (`codelet_lang`),
KEY (`codelet_rank`),
KEY (`codelet_date_created`),
KEY (`codelet_date_modified`),
@@ -88,18 +88,17 @@ CREATE TABLE `symbol_locations` (
) ENGINE=InnoDB;

CREATE TABLE `cache` (
`cache_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`cache_hash` BIGINT NOT NULL,
`cache_count_mnt` TINYINT UNSIGNED NOT NULL,
`cache_id` BIGINT NOT NULL,
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL,
`cache_count_exp` TINYINT UNSIGNED NOT NULL,
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
`cache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`cache_id`)
) ENGINE=InnoDB;

CREATE TABLE `cache_data` (
`cdata_cache` INT UNSIGNED NOT NULL,
`cdata_cache` BIGINT NOT NULL,
`cdata_codelet` BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
FOREIGN KEY (`cdata_cache`)
REFERENCES `cache` (`cache_id`)
ON DELETE CASCADE ON UPDATE CASCADE,
@@ -107,3 +106,9 @@ CREATE TABLE `cache_data` (
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);

Laden…
Annuleren
Opslaan