From bc3b9e7587e40579bfceeb448c8260a554d87854 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 17 Apr 2014 17:33:14 -0400 Subject: [PATCH] Some more database design work. --- bitshift/database.py | 13 ++++++++-- bitshift/query/__init__.py | 2 ++ schema.sql | 65 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 68 insertions(+), 12 deletions(-) diff --git a/bitshift/database.py b/bitshift/database.py index 647fe55..07c71c2 100644 --- a/bitshift/database.py +++ b/bitshift/database.py @@ -16,16 +16,25 @@ class Database(object): """Establish a connection to the database.""" self._conn = oursql.connect() - def search(self, query): + def search(self, query, page=1): """ - Search the database. + Search the database for a query and return the *n*\ th page of results. :param query: The query to search for. :type query: :py:class:`~.query.tree.Tree` + :param page: The result page to display. + :type page: int :return: A list of search results. :rtype: list of :py:class:`.Codelet`\ s """ + # query tree hash + page -> cached? + # cache HIT: + # if qcache_created is too old: invalidate cache, goto cache MISS + # update qcache_last_used + # parse qcache_results, fetch codelets + # cache MISS: + # build complex search query pass def insert(self, codelet): diff --git a/bitshift/query/__init__.py b/bitshift/query/__init__.py index 7d6e0d5..6971c04 100644 --- a/bitshift/query/__init__.py +++ b/bitshift/query/__init__.py @@ -6,4 +6,6 @@ __all__ = ["parse_query"] def parse_query(query): # gets a string, returns a Tree + # TODO: note: resultant Trees should be normalized so that "foo OR bar" + # and "bar OR foo" result in equivalent trees pass diff --git a/schema.sql b/schema.sql index 3cb915c..d49fc6e 100644 --- a/schema.sql +++ b/schema.sql @@ -1,23 +1,68 @@ -CREATE DATABASE bitshift DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; +CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; -CREATE TABLE codelets ( +CREATE TABLE `languages` ( + `language_id` SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `language_name` VARCHAR(64) NOT NULL, + PRIMARY KEY (`language_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `origins` ( + `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `origin_name` VARCHAR(64) NOT NULL, + `origin_url` VARCHAR(512) NOT NULL, + `origin_url_base` VARCHAR(512) NOT NULL, + `origin_image` TINYBLOB DEFAULT NULL, -- TODO: verify size (<64kB) + PRIMARY KEY (`origin_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `codelets` ( `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, `codelet_name` VARCHAR(512) NOT NULL, `codelet_code_id` BIGINT UNSIGNED NOT NULL, - `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, - `codelet_origin` TINYINT UNSIGNED DEFAULT NULL, + `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, -- TODO: needs index + `codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_url` VARCHAR(512) NOT NULL, - `codelet_date_created` DATETIME DEFAULT NULL, - `codelet_date_modified` DATETIME DEFAULT NULL, + `codelet_date_created` DATETIME DEFAULT NULL, -- TODO: needs index + `codelet_date_modified` DATETIME DEFAULT NULL, -- TODO: needs index PRIMARY KEY (`codelet_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; -CREATE TABLE code ( +CREATE TABLE `code` ( `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, - `code_hash` BIGINT NOT NULL, - `code_code` MEDIUMTEXT NOT NULL, + `code_hash` BIGINT NOT NULL, -- TODO: needs index + `code_code` MEDIUMTEXT NOT NULL, -- TODO: verify size (16mB?) PRIMARY KEY (`code_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; --- separate tables: authors, symbols, caches, search indices +CREATE TABLE `authors` ( + `author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `author_codelet` BIGINT UNSIGNED NOT NULL, -- TODO: foreign index? + `author_name` VARCHAR(128) NOT NULL, -- TODO: needs index + `author_url` VARCHAR(512) DEFAULT NULL, + PRIMARY KEY (`author_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `symbols` ( + `symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `symbol_codelet` BIGINT UNSIGNED NOT NULL, -- TODO: foreign index? + `symbol_type` TINYINT UNSIGNED NOT NULL, -- TODO: multi-column index? + `symbol_name` VARCHAR(512) NOT NULL, -- TODO: needs index + `symbol_row` INT UNSIGNED NOT NULL, + `symbol_col` INT UNSIGNED NOT NULL, + PRIMARY KEY (`symbol_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `query_cache` ( + `qcache_id` INT NOT NULL UNIQUE, + `qcache_query` VARCHAR(512) NOT NULL, + `qcache_results` BLOB NOT NULL, -- TODO: verify; perhaps use some kind of array + `qcache_page` TINYINT UNSIGNED NOT NULL, + `qcache_count_mnt` TINYINT UNSIGNED NOT NULL, + `qcache_count_exp` TINYINT UNSIGNED NOT NULL, + `qcache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- TODO: verify + `qcache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- TODO: verify + PRIMARY KEY (`cache_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- TODO: full-text search index table