From 962dd9aef55a50a5ffa395dc78e897158157b27d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 14 Apr 2014 12:02:23 -0400 Subject: [PATCH 01/18] Docstrings for Database methods; oursql dependency. --- app.py | 9 ++++++--- bitshift/database.py | 21 +++++++++++++++++++++ setup.py | 2 +- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/app.py b/app.py index c4083c9..2e3b0c8 100644 --- a/app.py +++ b/app.py @@ -5,6 +5,7 @@ Module to contain all the project's Flask server plumbing. from flask import Flask from flask import render_template, session +from bitshift.database import Database from bitshift.query import parse_query app = Flask(__name__) @@ -12,7 +13,9 @@ app.config.from_object("bitshift.config") app_env = app.jinja_env app_env.line_statement_prefix = "=" -app_env.globals.update(assets = assets) +app_env.globals.update(assets=assets) + +database = Database() @app.route("/") def index(): @@ -20,8 +23,8 @@ def index(): @app.route("/search/") def search(query): - ## tree = parse_query(query) - ## database.search(tree) + tree = parse_query(query) + database.search(tree) pass if __name__ == "__main__": diff --git a/bitshift/database.py b/bitshift/database.py index b8995ee..36b984e 100644 --- a/bitshift/database.py +++ b/bitshift/database.py @@ -16,3 +16,24 @@ class Database(object): def _create(self): pass + + def search(self, query): + """ + Search the database. + + :param query: The query to search for. + :type query: :py:class:`~.query.tree.Tree` + + :return: A list of search results. + :rtype: list of :py:class:`.Codelet`\ s + """ + pass + + def insert(self, codelet): + """ + Insert a codelet into the database. + + :param codelet: The codelet to insert. + :type codelet: :py:class:`.Codelet` + """ + pass diff --git a/setup.py b/setup.py index 1faa5b9..5fa1189 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( version = "0.1", packages = find_packages(), install_requires = ["Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", - "BeautifulSoup>=3.2.1"], + "BeautifulSoup>=3.2.1", "oursql>=0.9.3.1"], author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", license = "MIT", url = "https://github.com/earwig/bitshift" From 085fd62704c1ee5d9b88daef4f5992082e9c56dc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 15 Apr 2014 00:38:12 -0400 Subject: [PATCH 02/18] Database schema, hashing module, some other things. --- .gitignore | 1 + bitshift/database.py | 10 +++++----- schema.sql | 23 +++++++++++++++++++++++ setup.py | 5 +++-- 4 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 schema.sql diff --git a/.gitignore b/.gitignore index 6a014f5..7e00121 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .sass-cache .DS_Store +.my.cnf # github premade rules *.py[cod] diff --git a/bitshift/database.py b/bitshift/database.py index 36b984e..647fe55 100644 --- a/bitshift/database.py +++ b/bitshift/database.py @@ -3,19 +3,18 @@ Module with classes and functions to handle communication with the MySQL database backend, which manages the search index. """ +import mmh3 import oursql class Database(object): """Represents the MySQL database.""" def __init__(self): - pass + self._connect() def _connect(self): - pass - - def _create(self): - pass + """Establish a connection to the database.""" + self._conn = oursql.connect() def search(self, query): """ @@ -36,4 +35,5 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ + # code_hash = mmh3.hash64(codelet.code)[0] pass diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..3cb915c --- /dev/null +++ b/schema.sql @@ -0,0 +1,23 @@ +CREATE DATABASE bitshift DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; +USE `bitshift`; + +CREATE TABLE codelets ( + `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `codelet_name` VARCHAR(512) NOT NULL, + `codelet_code_id` BIGINT UNSIGNED NOT NULL, + `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, + `codelet_origin` TINYINT UNSIGNED DEFAULT NULL, + `codelet_url` VARCHAR(512) NOT NULL, + `codelet_date_created` DATETIME DEFAULT NULL, + `codelet_date_modified` DATETIME DEFAULT NULL, + PRIMARY KEY (`codelet_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE code ( + `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `code_hash` BIGINT NOT NULL, + `code_code` MEDIUMTEXT NOT NULL, + PRIMARY KEY (`code_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- separate tables: authors, symbols, caches, search indices diff --git a/setup.py b/setup.py index 5fa1189..97441b7 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,9 @@ setup( name = "bitshift", version = "0.1", packages = find_packages(), - install_requires = ["Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", - "BeautifulSoup>=3.2.1", "oursql>=0.9.3.1"], + install_requires = [ + "Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", + "BeautifulSoup>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"], author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", license = "MIT", url = "https://github.com/earwig/bitshift" From bc3b9e7587e40579bfceeb448c8260a554d87854 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 17 Apr 2014 17:33:14 -0400 Subject: [PATCH 03/18] Some more database design work. --- bitshift/database.py | 13 ++++++++-- bitshift/query/__init__.py | 2 ++ schema.sql | 65 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 68 insertions(+), 12 deletions(-) diff --git a/bitshift/database.py b/bitshift/database.py index 647fe55..07c71c2 100644 --- a/bitshift/database.py +++ b/bitshift/database.py @@ -16,16 +16,25 @@ class Database(object): """Establish a connection to the database.""" self._conn = oursql.connect() - def search(self, query): + def search(self, query, page=1): """ - Search the database. + Search the database for a query and return the *n*\ th page of results. :param query: The query to search for. :type query: :py:class:`~.query.tree.Tree` + :param page: The result page to display. + :type page: int :return: A list of search results. :rtype: list of :py:class:`.Codelet`\ s """ + # query tree hash + page -> cached? + # cache HIT: + # if qcache_created is too old: invalidate cache, goto cache MISS + # update qcache_last_used + # parse qcache_results, fetch codelets + # cache MISS: + # build complex search query pass def insert(self, codelet): diff --git a/bitshift/query/__init__.py b/bitshift/query/__init__.py index 7d6e0d5..6971c04 100644 --- a/bitshift/query/__init__.py +++ b/bitshift/query/__init__.py @@ -6,4 +6,6 @@ __all__ = ["parse_query"] def parse_query(query): # gets a string, returns a Tree + # TODO: note: resultant Trees should be normalized so that "foo OR bar" + # and "bar OR foo" result in equivalent trees pass diff --git a/schema.sql b/schema.sql index 3cb915c..d49fc6e 100644 --- a/schema.sql +++ b/schema.sql @@ -1,23 +1,68 @@ -CREATE DATABASE bitshift DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; +CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; -CREATE TABLE codelets ( +CREATE TABLE `languages` ( + `language_id` SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `language_name` VARCHAR(64) NOT NULL, + PRIMARY KEY (`language_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `origins` ( + `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `origin_name` VARCHAR(64) NOT NULL, + `origin_url` VARCHAR(512) NOT NULL, + `origin_url_base` VARCHAR(512) NOT NULL, + `origin_image` TINYBLOB DEFAULT NULL, -- TODO: verify size (<64kB) + PRIMARY KEY (`origin_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `codelets` ( `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, `codelet_name` VARCHAR(512) NOT NULL, `codelet_code_id` BIGINT UNSIGNED NOT NULL, - `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, - `codelet_origin` TINYINT UNSIGNED DEFAULT NULL, + `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, -- TODO: needs index + `codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_url` VARCHAR(512) NOT NULL, - `codelet_date_created` DATETIME DEFAULT NULL, - `codelet_date_modified` DATETIME DEFAULT NULL, + `codelet_date_created` DATETIME DEFAULT NULL, -- TODO: needs index + `codelet_date_modified` DATETIME DEFAULT NULL, -- TODO: needs index PRIMARY KEY (`codelet_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; -CREATE TABLE code ( +CREATE TABLE `code` ( `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, - `code_hash` BIGINT NOT NULL, - `code_code` MEDIUMTEXT NOT NULL, + `code_hash` BIGINT NOT NULL, -- TODO: needs index + `code_code` MEDIUMTEXT NOT NULL, -- TODO: verify size (16mB?) PRIMARY KEY (`code_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; --- separate tables: authors, symbols, caches, search indices +CREATE TABLE `authors` ( + `author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `author_codelet` BIGINT UNSIGNED NOT NULL, -- TODO: foreign index? + `author_name` VARCHAR(128) NOT NULL, -- TODO: needs index + `author_url` VARCHAR(512) DEFAULT NULL, + PRIMARY KEY (`author_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `symbols` ( + `symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `symbol_codelet` BIGINT UNSIGNED NOT NULL, -- TODO: foreign index? + `symbol_type` TINYINT UNSIGNED NOT NULL, -- TODO: multi-column index? + `symbol_name` VARCHAR(512) NOT NULL, -- TODO: needs index + `symbol_row` INT UNSIGNED NOT NULL, + `symbol_col` INT UNSIGNED NOT NULL, + PRIMARY KEY (`symbol_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +CREATE TABLE `query_cache` ( + `qcache_id` INT NOT NULL UNIQUE, + `qcache_query` VARCHAR(512) NOT NULL, + `qcache_results` BLOB NOT NULL, -- TODO: verify; perhaps use some kind of array + `qcache_page` TINYINT UNSIGNED NOT NULL, + `qcache_count_mnt` TINYINT UNSIGNED NOT NULL, + `qcache_count_exp` TINYINT UNSIGNED NOT NULL, + `qcache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- TODO: verify + `qcache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- TODO: verify + PRIMARY KEY (`cache_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- TODO: full-text search index table From 1cbe669c0247446fba178c07d3f8daf86e73e5ca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 17 Apr 2014 19:25:42 -0400 Subject: [PATCH 04/18] More work on db schema; all except FTS indices. --- bitshift/database.py | 12 +++--- schema.sql | 108 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 74 insertions(+), 46 deletions(-) diff --git a/bitshift/database.py b/bitshift/database.py index 07c71c2..b86b05a 100644 --- a/bitshift/database.py +++ b/bitshift/database.py @@ -28,13 +28,15 @@ class Database(object): :return: A list of search results. :rtype: list of :py:class:`.Codelet`\ s """ - # query tree hash + page -> cached? + # search for cache_hash = mmh3.hash(query.serialize() + str(page)) # cache HIT: - # if qcache_created is too old: invalidate cache, goto cache MISS - # update qcache_last_used - # parse qcache_results, fetch codelets + # update cache_last_used + # return codelets # cache MISS: # build complex search query + # fetch codelets + # cache results + # return codelets pass def insert(self, codelet): @@ -44,5 +46,5 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ - # code_hash = mmh3.hash64(codelet.code)[0] + # code_hash = mmh3.hash64(codelet.code.encode("utf8"))[0] pass diff --git a/schema.sql b/schema.sql index d49fc6e..21c9c07 100644 --- a/schema.sql +++ b/schema.sql @@ -2,67 +2,93 @@ CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; CREATE TABLE `languages` ( - `language_id` SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `language_id` SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, `language_name` VARCHAR(64) NOT NULL, PRIMARY KEY (`language_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +) ENGINE=InnoDB; CREATE TABLE `origins` ( - `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, + `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, `origin_name` VARCHAR(64) NOT NULL, `origin_url` VARCHAR(512) NOT NULL, `origin_url_base` VARCHAR(512) NOT NULL, - `origin_image` TINYBLOB DEFAULT NULL, -- TODO: verify size (<64kB) + `origin_image` BLOB DEFAULT NULL, PRIMARY KEY (`origin_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +) ENGINE=InnoDB; + +CREATE TABLE `code` ( + `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + `code_hash` BIGINT NOT NULL, + `code_code` MEDIUMTEXT NOT NULL, -- TODO: full-text search index + PRIMARY KEY (`code_id`), + KEY (`code_hash`) +) ENGINE=InnoDB; CREATE TABLE `codelets` ( - `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, - `codelet_name` VARCHAR(512) NOT NULL, + `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + `codelet_name` VARCHAR(300) NOT NULL, -- TODO: full-text search index `codelet_code_id` BIGINT UNSIGNED NOT NULL, - `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, -- TODO: needs index + `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, `codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_url` VARCHAR(512) NOT NULL, - `codelet_date_created` DATETIME DEFAULT NULL, -- TODO: needs index - `codelet_date_modified` DATETIME DEFAULT NULL, -- TODO: needs index - PRIMARY KEY (`codelet_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - -CREATE TABLE `code` ( - `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, - `code_hash` BIGINT NOT NULL, -- TODO: needs index - `code_code` MEDIUMTEXT NOT NULL, -- TODO: verify size (16mB?) - PRIMARY KEY (`code_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + `codelet_date_created` DATETIME DEFAULT NULL, + `codelet_date_modified` DATETIME DEFAULT NULL, + PRIMARY KEY (`codelet_id`), + KEY (`codelet_date_created`), + KEY (`codelet_date_modified`), + FOREIGN KEY (`codelet_code_id`) + REFERENCES `code` (`code_id`) + ON DELETE RESTRICT ON UPDATE CASCADE, + FOREIGN KEY (`codelet_lang`) + REFERENCES `languages` (`language_id`) + ON DELETE RESTRICT ON UPDATE CASCADE, + FOREIGN KEY (`codelet_origin`) + REFERENCES `origins` (`origin_id`) + ON DELETE RESTRICT ON UPDATE CASCADE +) ENGINE=InnoDB; CREATE TABLE `authors` ( - `author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, - `author_codelet` BIGINT UNSIGNED NOT NULL, -- TODO: foreign index? - `author_name` VARCHAR(128) NOT NULL, -- TODO: needs index + `author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + `author_codelet` BIGINT UNSIGNED NOT NULL, + `author_name` VARCHAR(128) NOT NULL, -- TODO: full-text search index `author_url` VARCHAR(512) DEFAULT NULL, - PRIMARY KEY (`author_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + PRIMARY KEY (`author_id`), + FOREIGN KEY (`author_codelet`) + REFERENCES `codelet` (`codelet_id`) + ON DELETE CASCADE ON UPDATE CASCADE +) ENGINE=InnoDB; CREATE TABLE `symbols` ( - `symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE, - `symbol_codelet` BIGINT UNSIGNED NOT NULL, -- TODO: foreign index? - `symbol_type` TINYINT UNSIGNED NOT NULL, -- TODO: multi-column index? - `symbol_name` VARCHAR(512) NOT NULL, -- TODO: needs index + `symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + `symbol_codelet` BIGINT UNSIGNED NOT NULL, + `symbol_type` TINYINT UNSIGNED NOT NULL, + `symbol_name` VARCHAR(512) NOT NULL, `symbol_row` INT UNSIGNED NOT NULL, `symbol_col` INT UNSIGNED NOT NULL, - PRIMARY KEY (`symbol_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + PRIMARY KEY (`symbol_id`), + KEY (`symbol_type`, `symbol_name`(32)), + FOREIGN KEY (`symbol_codelet`) + REFERENCES `codelet` (`codelet_id`) + ON DELETE CASCADE ON UPDATE CASCADE +) ENGINE=InnoDB; -CREATE TABLE `query_cache` ( - `qcache_id` INT NOT NULL UNIQUE, - `qcache_query` VARCHAR(512) NOT NULL, - `qcache_results` BLOB NOT NULL, -- TODO: verify; perhaps use some kind of array - `qcache_page` TINYINT UNSIGNED NOT NULL, - `qcache_count_mnt` TINYINT UNSIGNED NOT NULL, - `qcache_count_exp` TINYINT UNSIGNED NOT NULL, - `qcache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- TODO: verify - `qcache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- TODO: verify +CREATE TABLE `cache` ( + `cache_id` INT UNSIGNED NOT NULL AUTO_INCREMENT, + `cache_hash` BIGINT NOT NULL, + `cache_count_mnt` TINYINT UNSIGNED NOT NULL, + `cache_count_exp` TINYINT UNSIGNED NOT NULL, + `cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + `cache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`cache_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +) ENGINE=InnoDB; --- TODO: full-text search index table +CREATE TABLE `cache_data` ( + `cdata_cache` INT UNSIGNED NOT NULL, + `cdata_codelet` BIGINT UNSIGNED NOT NULL, + FOREIGN KEY (`cdata_cache`) + REFERENCES `cache` (`cache_id`) + ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (`cdata_codelet`) + REFERENCES `codelet` (`codelet_id`) + ON DELETE CASCADE ON UPDATE CASCADE +) ENGINE=InnoDB; From 75b243f6853f224593c6aff1153ea9a74f768ba4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 17 Apr 2014 20:33:14 -0400 Subject: [PATCH 05/18] Remove languages table; add indexed field for codelet rank. --- bitshift/database.py | 2 ++ schema.sql | 11 ++--------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/bitshift/database.py b/bitshift/database.py index b86b05a..02aa38e 100644 --- a/bitshift/database.py +++ b/bitshift/database.py @@ -6,6 +6,8 @@ database backend, which manages the search index. import mmh3 import oursql +# from .languages import ... + class Database(object): """Represents the MySQL database.""" diff --git a/schema.sql b/schema.sql index 21c9c07..a76f8f8 100644 --- a/schema.sql +++ b/schema.sql @@ -1,12 +1,6 @@ CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; -CREATE TABLE `languages` ( - `language_id` SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, - `language_name` VARCHAR(64) NOT NULL, - PRIMARY KEY (`language_id`) -) ENGINE=InnoDB; - CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, `origin_name` VARCHAR(64) NOT NULL, @@ -31,17 +25,16 @@ CREATE TABLE `codelets` ( `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, `codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_url` VARCHAR(512) NOT NULL, + `codelet_rank` FLOAT NOT NULL, `codelet_date_created` DATETIME DEFAULT NULL, `codelet_date_modified` DATETIME DEFAULT NULL, PRIMARY KEY (`codelet_id`), + KEY (`codelet_rank`), KEY (`codelet_date_created`), KEY (`codelet_date_modified`), FOREIGN KEY (`codelet_code_id`) REFERENCES `code` (`code_id`) ON DELETE RESTRICT ON UPDATE CASCADE, - FOREIGN KEY (`codelet_lang`) - REFERENCES `languages` (`language_id`) - ON DELETE RESTRICT ON UPDATE CASCADE, FOREIGN KEY (`codelet_origin`) REFERENCES `origins` (`origin_id`) ON DELETE RESTRICT ON UPDATE CASCADE From fb4e0d5916d6e6edcae9e5c6ef6cedb55ed9725f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 18 Apr 2014 02:16:42 -0400 Subject: [PATCH 06/18] FULLTEXT KEYs where appropriate. --- schema.sql | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/schema.sql b/schema.sql index a76f8f8..df77720 100644 --- a/schema.sql +++ b/schema.sql @@ -13,14 +13,15 @@ CREATE TABLE `origins` ( CREATE TABLE `code` ( `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, `code_hash` BIGINT NOT NULL, - `code_code` MEDIUMTEXT NOT NULL, -- TODO: full-text search index + `code_code` MEDIUMTEXT NOT NULL, PRIMARY KEY (`code_id`), - KEY (`code_hash`) + KEY (`code_hash`), + FULLTEXT KEY (`codelet_code`) ) ENGINE=InnoDB; CREATE TABLE `codelets` ( `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, - `codelet_name` VARCHAR(300) NOT NULL, -- TODO: full-text search index + `codelet_name` VARCHAR(300) NOT NULL, `codelet_code_id` BIGINT UNSIGNED NOT NULL, `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, `codelet_origin` TINYINT UNSIGNED NOT NULL, @@ -29,6 +30,7 @@ CREATE TABLE `codelets` ( `codelet_date_created` DATETIME DEFAULT NULL, `codelet_date_modified` DATETIME DEFAULT NULL, PRIMARY KEY (`codelet_id`), + FULLTEXT KEY (`codelet_name`), KEY (`codelet_rank`), KEY (`codelet_date_created`), KEY (`codelet_date_modified`), @@ -43,9 +45,10 @@ CREATE TABLE `codelets` ( CREATE TABLE `authors` ( `author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, `author_codelet` BIGINT UNSIGNED NOT NULL, - `author_name` VARCHAR(128) NOT NULL, -- TODO: full-text search index + `author_name` VARCHAR(128) NOT NULL, `author_url` VARCHAR(512) DEFAULT NULL, PRIMARY KEY (`author_id`), + FULLTEXT KEY (`author_name`), FOREIGN KEY (`author_codelet`) REFERENCES `codelet` (`codelet_id`) ON DELETE CASCADE ON UPDATE CASCADE From ad3de0615fdd0fbf5310dd4354abb6daa162e0dc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 24 Apr 2014 14:38:33 -0400 Subject: [PATCH 07/18] Fix some typos in the schema. --- schema.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/schema.sql b/schema.sql index df77720..15979be 100644 --- a/schema.sql +++ b/schema.sql @@ -16,7 +16,7 @@ CREATE TABLE `code` ( `code_code` MEDIUMTEXT NOT NULL, PRIMARY KEY (`code_id`), KEY (`code_hash`), - FULLTEXT KEY (`codelet_code`) + FULLTEXT KEY (`code_code`) ) ENGINE=InnoDB; CREATE TABLE `codelets` ( @@ -50,7 +50,7 @@ CREATE TABLE `authors` ( PRIMARY KEY (`author_id`), FULLTEXT KEY (`author_name`), FOREIGN KEY (`author_codelet`) - REFERENCES `codelet` (`codelet_id`) + REFERENCES `codelets` (`codelet_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB; @@ -64,7 +64,7 @@ CREATE TABLE `symbols` ( PRIMARY KEY (`symbol_id`), KEY (`symbol_type`, `symbol_name`(32)), FOREIGN KEY (`symbol_codelet`) - REFERENCES `codelet` (`codelet_id`) + REFERENCES `codelets` (`codelet_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB; @@ -85,6 +85,6 @@ CREATE TABLE `cache_data` ( REFERENCES `cache` (`cache_id`) ON DELETE CASCADE ON UPDATE CASCADE, FOREIGN KEY (`cdata_codelet`) - REFERENCES `codelet` (`codelet_id`) + REFERENCES `codelets` (`codelet_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB; From 54bca5894f9f0866538292f40593f99e61eeae97 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 27 Apr 2014 00:47:13 -0400 Subject: [PATCH 08/18] Move database stuff to a subpackage; updates. --- bitshift/{database.py => database/__init__.py} | 39 ++++++++++++++++++++++---- schema.sql => bitshift/database/schema.sql | 0 2 files changed, 34 insertions(+), 5 deletions(-) rename bitshift/{database.py => database/__init__.py} (55%) rename schema.sql => bitshift/database/schema.sql (100%) diff --git a/bitshift/database.py b/bitshift/database/__init__.py similarity index 55% rename from bitshift/database.py rename to bitshift/database/__init__.py index 02aa38e..4ed7a02 100644 --- a/bitshift/database.py +++ b/bitshift/database/__init__.py @@ -1,12 +1,16 @@ """ -Module with classes and functions to handle communication with the MySQL +Subpackage with classes and functions to handle communication with the MySQL database backend, which manages the search index. """ +import os + import mmh3 import oursql -# from .languages import ... +# from ..languages import ... + +__all__ = ["Database"] class Database(object): """Represents the MySQL database.""" @@ -16,7 +20,9 @@ class Database(object): def _connect(self): """Establish a connection to the database.""" - self._conn = oursql.connect() + default_file = os.path.join(os.path.dirname(__file__), ".my.cnf") + self._conn = oursql.connect(read_default_file=default_file, + autoping=True, autoreconnect=True) def search(self, query, page=1): """ @@ -48,5 +54,28 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ - # code_hash = mmh3.hash64(codelet.code.encode("utf8"))[0] - pass + query = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + + cursor.execute(query, ()) + + # codelet_id -- auto_increment used here + codelet_name + codelet_code_id + codelet_lang + codelet_origin + codelet_url + codelet_rank + codelet_date_created + codelet_date_modified + + # codelet fields + codelet.name + codelet.code + codelet.filename + codelet.language + codelet.authors + codelet.code_url + codelet.date_created + codelet.date_modified + + code_hash = mmh3.hash64(codelet.code.encode("utf8"))[0] diff --git a/schema.sql b/bitshift/database/schema.sql similarity index 100% rename from schema.sql rename to bitshift/database/schema.sql From 0d0a74f9dfd7fa382f2dcdb02256246e062d0450 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 27 Apr 2014 23:43:32 -0400 Subject: [PATCH 09/18] Some more work on db stuff. --- bitshift/database/__init__.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 4ed7a02..9a54ef2 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -20,7 +20,8 @@ class Database(object): def _connect(self): """Establish a connection to the database.""" - default_file = os.path.join(os.path.dirname(__file__), ".my.cnf") + root = os.path.dirname(os.path.abspath(__file__)) + default_file = os.path.join(root, ".my.cnf") self._conn = oursql.connect(read_default_file=default_file, autoping=True, autoreconnect=True) @@ -54,9 +55,18 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ - query = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + frag_size = 16384 # 16 kB + query_slt1 = """SELECT code_id, LEFT(code_code, {0}) + FROM code WHERE code_hash = ?""".format(frag_size) + query_ins1 = "INSERT INTO code VALUES (?, ?)" + query_ins2 = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + query_ins3 = "INSERT INTO authors VALUES", " (?, ?, ?)" + query_ins4 = "INSERT INTO symbols VALUES", " (?, ?, ?, ?, ?)" - cursor.execute(query, ()) + # LAST_INSERT_ID() + + code_id = None + code_hash = mmh3.hash64(codelet.code.encode("utf8"))[0] # codelet_id -- auto_increment used here codelet_name @@ -78,4 +88,14 @@ class Database(object): codelet.date_created codelet.date_modified - code_hash = mmh3.hash64(codelet.code.encode("utf8"))[0] + with self._conn.cursor() as cursor: + # Retrieve the ID of the source code if it's already in the DB: + cursor.execute(query_slt1, (code_hash,)) + for c_id, c_code_frag in cursor.fetchall(): + if c_code_frag == codelet.code[:frag_size]: + code_id = c_id + break + + # If the source code isn't already in the DB, add it: + if not code_id: + cursor.execute() From 22d6b625474f535d53adef652bd4d6e3397af04e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 28 Apr 2014 14:05:45 -0400 Subject: [PATCH 10/18] Update schema to v2; database updates. --- bitshift/database/__init__.py | 62 +++++++++++++++++++++++++++--------------- bitshift/database/migration.py | 23 ++++++++++++++++ bitshift/database/schema.sql | 13 +++++++-- 3 files changed, 73 insertions(+), 25 deletions(-) create mode 100644 bitshift/database/migration.py diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 9a54ef2..50486b6 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -8,15 +8,16 @@ import os import mmh3 import oursql -# from ..languages import ... +from .migration import VERSION, MIGRATIONS __all__ = ["Database"] class Database(object): """Represents the MySQL database.""" - def __init__(self): + def __init__(self, migrate=False): self._connect() + self._check_version(migrate) def _connect(self): """Establish a connection to the database.""" @@ -25,6 +26,33 @@ class Database(object): self._conn = oursql.connect(read_default_file=default_file, autoping=True, autoreconnect=True) + def _migrate(self, cursor, current): + """Migrate the database to the latest schema version.""" + for version in xrange(current, VERSION): + for query in MIGRATIONS[version - 1]: + cursor.execute(query) + + def _check_version(self, migrate): + """Check the database schema version and respond accordingly. + + If the schema is out of date, migrate if *migrate* is True, else raise + an exception. + """ + with self._conn.cursor() as cursor: + cursor.execute("SELECT version FROM version") + version = cursor.fetchone()[0] + if version < VERSION: + if migrate: + self._migrate(cursor, version) + else: + err = "Database schema out of date. " \ + "Run `python -m bitshift.database.migration`." + raise RuntimeError(err) + + def close(self): + """Disconnect from the database.""" + self._conn.close() + def search(self, query, page=1): """ Search the database for a query and return the *n*\ th page of results. @@ -55,19 +83,14 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ - frag_size = 16384 # 16 kB - query_slt1 = """SELECT code_id, LEFT(code_code, {0}) - FROM code WHERE code_hash = ?""".format(frag_size) - query_ins1 = "INSERT INTO code VALUES (?, ?)" - query_ins2 = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" - query_ins3 = "INSERT INTO authors VALUES", " (?, ?, ?)" - query_ins4 = "INSERT INTO symbols VALUES", " (?, ?, ?, ?, ?)" + query1 = """INSERT INTO code VALUES (?, ?) + ON DUPLICATE KEY UPDATE code_id=code_id""" + query2 = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + query3 = "INSERT INTO authors VALUES", " (?, ?, ?)" + query4 = "INSERT INTO symbols VALUES", " (?, ?, ?, ?, ?)" # LAST_INSERT_ID() - code_id = None - code_hash = mmh3.hash64(codelet.code.encode("utf8"))[0] - # codelet_id -- auto_increment used here codelet_name codelet_code_id @@ -88,14 +111,9 @@ class Database(object): codelet.date_created codelet.date_modified + ####################################################################### + + code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] + with self._conn.cursor() as cursor: - # Retrieve the ID of the source code if it's already in the DB: - cursor.execute(query_slt1, (code_hash,)) - for c_id, c_code_frag in cursor.fetchall(): - if c_code_frag == codelet.code[:frag_size]: - code_id = c_id - break - - # If the source code isn't already in the DB, add it: - if not code_id: - cursor.execute() + cursor.execute(query1, (code_id, codelet.code)) diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py new file mode 100644 index 0000000..c9fdd39 --- /dev/null +++ b/bitshift/database/migration.py @@ -0,0 +1,23 @@ +""" +Contains information about database schema versions, and SQL queries to update +between them. +""" + +VERSION = 2 + +MIGRATIONS = [ + # 1 -> 2 + [ + # drop index on code_hash + "ALTER TABLE code DROP COLUMN code_hash", + # change code_id to BIGINT NOT NULL, + # add key on codelets to codelet_lang + # add symbol_end_row INT UNSIGNED NOT NULL + # add symbol_end_col INT UNSIGNED NOT NULL + ] +] + +if __name__ == "__main__": + from . import Database + + Database(migrate=True).close() diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 15979be..159f85a 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,6 +1,12 @@ +-- Schema version 2 + CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; +CREATE TABLE `version` ( + `version` INT UNSIGNED NOT NULL +) ENGINE=InnoDB; + CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, `origin_name` VARCHAR(64) NOT NULL, @@ -11,11 +17,9 @@ CREATE TABLE `origins` ( ) ENGINE=InnoDB; CREATE TABLE `code` ( - `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, - `code_hash` BIGINT NOT NULL, + `code_id` BIGINT NOT NULL, `code_code` MEDIUMTEXT NOT NULL, PRIMARY KEY (`code_id`), - KEY (`code_hash`), FULLTEXT KEY (`code_code`) ) ENGINE=InnoDB; @@ -31,6 +35,7 @@ CREATE TABLE `codelets` ( `codelet_date_modified` DATETIME DEFAULT NULL, PRIMARY KEY (`codelet_id`), FULLTEXT KEY (`codelet_name`), + KEY (`codelet_lang`), KEY (`codelet_rank`), KEY (`codelet_date_created`), KEY (`codelet_date_modified`), @@ -61,6 +66,8 @@ CREATE TABLE `symbols` ( `symbol_name` VARCHAR(512) NOT NULL, `symbol_row` INT UNSIGNED NOT NULL, `symbol_col` INT UNSIGNED NOT NULL, + `symbol_end_row` INT UNSIGNED NOT NULL, + `symbol_end_col` INT UNSIGNED NOT NULL, PRIMARY KEY (`symbol_id`), KEY (`symbol_type`, `symbol_name`(32)), FOREIGN KEY (`symbol_codelet`) From 0b655daaff3cdd41f48b96fe34f786f10deed56a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 29 Apr 2014 13:19:02 -0400 Subject: [PATCH 11/18] Finish migration to v2. --- bitshift/database/__init__.py | 2 ++ bitshift/database/migration.py | 21 +++++++++++++++------ bitshift/database/schema.sql | 3 ++- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 50486b6..14f7575 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -29,8 +29,10 @@ class Database(object): def _migrate(self, cursor, current): """Migrate the database to the latest schema version.""" for version in xrange(current, VERSION): + print "Migrating to %d..." % version + 1 for query in MIGRATIONS[version - 1]: cursor.execute(query) + cursor.execute("UPDATE version SET version = ?", (version + 1,)) def _check_version(self, migrate): """Check the database schema version and respond accordingly. diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py index c9fdd39..2ea9666 100644 --- a/bitshift/database/migration.py +++ b/bitshift/database/migration.py @@ -8,12 +8,21 @@ VERSION = 2 MIGRATIONS = [ # 1 -> 2 [ - # drop index on code_hash - "ALTER TABLE code DROP COLUMN code_hash", - # change code_id to BIGINT NOT NULL, - # add key on codelets to codelet_lang - # add symbol_end_row INT UNSIGNED NOT NULL - # add symbol_end_col INT UNSIGNED NOT NULL + """ALTER TABLE `codelets` + DROP FOREIGN KEY `codelets_ibfk_1`""", + """ALTER TABLE `code` + DROP KEY `code_hash`, + DROP COLUMN `code_hash`, + MODIFY COLUMN `code_id` BIGINT NOT NULL""", + """ALTER TABLE `codelets` + MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL, + ADD KEY (`codelet_lang`), + ADD FOREIGN KEY (`codelet_code_id`) + REFERENCES `code` (`code_id`) + ON DELETE RESTRICT ON UPDATE CASCADE""", + """ALTER TABLE `symbols` + ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL, + ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL""" ] ] diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 159f85a..56a2d85 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -6,6 +6,7 @@ USE `bitshift`; CREATE TABLE `version` ( `version` INT UNSIGNED NOT NULL ) ENGINE=InnoDB; +INSERT INTO `version` VALUES (2); CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, @@ -26,7 +27,7 @@ CREATE TABLE `code` ( CREATE TABLE `codelets` ( `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, `codelet_name` VARCHAR(300) NOT NULL, - `codelet_code_id` BIGINT UNSIGNED NOT NULL, + `codelet_code_id` BIGINT NOT NULL, `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, `codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_url` VARCHAR(512) NOT NULL, From 821a6ae4f1a30c2b8b4575c408145f8b34877206 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 30 Apr 2014 14:44:31 -0400 Subject: [PATCH 12/18] DB -> v3 for symbol->code assoc vs. ->codelet (fixes #13) --- bitshift/database/migration.py | 13 +++++++++++-- bitshift/database/schema.sql | 10 +++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py index 2ea9666..caf3020 100644 --- a/bitshift/database/migration.py +++ b/bitshift/database/migration.py @@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update between them. """ -VERSION = 2 +VERSION = 3 MIGRATIONS = [ # 1 -> 2 @@ -17,12 +17,21 @@ MIGRATIONS = [ """ALTER TABLE `codelets` MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL, ADD KEY (`codelet_lang`), - ADD FOREIGN KEY (`codelet_code_id`) + ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`) REFERENCES `code` (`code_id`) ON DELETE RESTRICT ON UPDATE CASCADE""", """ALTER TABLE `symbols` ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL, ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL""" + ], + # 2 -> 3 + [ + """ALTER TABLE `symbols` + DROP FOREIGN KEY `symbols_ibfk_1`, + CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL, + ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`) + REFERENCES `code` (`code_id`) + ON DELETE CASCADE ON UPDATE CASCADE""" ] ] diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 56a2d85..99b9e42 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,4 +1,4 @@ --- Schema version 2 +-- Schema version 3 CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; @@ -6,7 +6,7 @@ USE `bitshift`; CREATE TABLE `version` ( `version` INT UNSIGNED NOT NULL ) ENGINE=InnoDB; -INSERT INTO `version` VALUES (2); +INSERT INTO `version` VALUES (3); CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, @@ -62,7 +62,7 @@ CREATE TABLE `authors` ( CREATE TABLE `symbols` ( `symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, - `symbol_codelet` BIGINT UNSIGNED NOT NULL, + `symbol_code` BIGINT NOT NULL, `symbol_type` TINYINT UNSIGNED NOT NULL, `symbol_name` VARCHAR(512) NOT NULL, `symbol_row` INT UNSIGNED NOT NULL, @@ -71,8 +71,8 @@ CREATE TABLE `symbols` ( `symbol_end_col` INT UNSIGNED NOT NULL, PRIMARY KEY (`symbol_id`), KEY (`symbol_type`, `symbol_name`(32)), - FOREIGN KEY (`symbol_codelet`) - REFERENCES `codelets` (`codelet_id`) + FOREIGN KEY (`symbol_code`) + REFERENCES `code` (`code_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB; From e3a838220c7394e0985e627a4d7c090ba09e6bb2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 30 Apr 2014 14:44:45 -0400 Subject: [PATCH 13/18] Flesh out most of Database.insert(). --- bitshift/database/__init__.py | 44 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 14f7575..03a5c2c 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -87,35 +87,25 @@ class Database(object): """ query1 = """INSERT INTO code VALUES (?, ?) ON DUPLICATE KEY UPDATE code_id=code_id""" - query2 = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" - query3 = "INSERT INTO authors VALUES", " (?, ?, ?)" - query4 = "INSERT INTO symbols VALUES", " (?, ?, ?, ?, ?)" - - # LAST_INSERT_ID() - - # codelet_id -- auto_increment used here - codelet_name - codelet_code_id - codelet_lang - codelet_origin - codelet_url - codelet_rank - codelet_date_created - codelet_date_modified - - # codelet fields - codelet.name - codelet.code - codelet.filename - codelet.language - codelet.authors - codelet.code_url - codelet.date_created - codelet.date_modified - - ####################################################################### + query2 = """INSERT INTO codelets VALUES + (?, ?, ?, ?, ?, ?, ?, ?)""" + query3 = "SELECT LAST_INSERT_ID()" + query4 = "INSERT INTO authors VALUES (?, ?, ?)" + query5 = "INSERT INTO symbols VALUES (?, ?, ?, ?, ?, ?, ?)" code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] + origin, url = decompose(codelet.url) ## TODO: create decompose() function with self._conn.cursor() as cursor: cursor.execute(query1, (code_id, codelet.code)) + cursor.execute(query2, (codelet.name, code_id, codelet.language, + origin, url, codelet.rank, + codelet.date_created, + codelet.date_modified)) + cursor.execute(query3) + codelet_id = cursor.fetchone()[0] + authors = [(codelet_id, a.name, a.url) for a in codelet.authors] ## TODO: check author fields (is it a tuple?) + cursor.executemany(query4, authors) + if code_id is new: ## TODO: check for this properly + symbols = [(code_id, sym.type, sym.name, sym.row, sym.col, sym.end_row, sym.end_col) for sym in codelet.symbols] # TODO: check symbol fields (dict?) + cursor.executemany(query5, symbols) From 97b0644bf01932ba32863999226ae1ade7cd8fee Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 2 May 2014 14:40:00 -0400 Subject: [PATCH 14/18] Database to v4: split off symbol_locations table. --- bitshift/database/migration.py | 23 ++++++++++++++++++++++- bitshift/database/schema.sql | 22 ++++++++++++++++------ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py index caf3020..e0ec762 100644 --- a/bitshift/database/migration.py +++ b/bitshift/database/migration.py @@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update between them. """ -VERSION = 3 +VERSION = 4 MIGRATIONS = [ # 1 -> 2 @@ -32,6 +32,27 @@ MIGRATIONS = [ ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`) REFERENCES `code` (`code_id`) ON DELETE CASCADE ON UPDATE CASCADE""" + ], + # 3 -> 4 + [ + """ALTER TABLE `symbols` + DROP COLUMN `symbol_row`, + DROP COLUMN `symbol_col`, + DROP COLUMN `symbol_end_row`, + DROP COLUMN `symbol_end_col`""", + """CREATE TABLE `symbol_locations` ( + `sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + `sloc_symbol` BIGINT UNSIGNED NOT NULL, + `sloc_type` TINYINT UNSIGNED NOT NULL, + `sloc_row` INT UNSIGNED NOT NULL, + `sloc_col` INT UNSIGNED NOT NULL, + `sloc_end_row` INT UNSIGNED NOT NULL, + `sloc_end_col` INT UNSIGNED NOT NULL, + PRIMARY KEY (`sloc_id`), + FOREIGN KEY (`sloc_symbol`) + REFERENCES `symbols` (`symbol_id`) + ON DELETE CASCADE ON UPDATE CASCADE + ) ENGINE=InnoDB""" ] ] diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 99b9e42..79dad45 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,4 +1,4 @@ --- Schema version 3 +-- Schema version 4 CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; @@ -6,7 +6,7 @@ USE `bitshift`; CREATE TABLE `version` ( `version` INT UNSIGNED NOT NULL ) ENGINE=InnoDB; -INSERT INTO `version` VALUES (3); +INSERT INTO `version` VALUES (4); CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, @@ -65,10 +65,6 @@ CREATE TABLE `symbols` ( `symbol_code` BIGINT NOT NULL, `symbol_type` TINYINT UNSIGNED NOT NULL, `symbol_name` VARCHAR(512) NOT NULL, - `symbol_row` INT UNSIGNED NOT NULL, - `symbol_col` INT UNSIGNED NOT NULL, - `symbol_end_row` INT UNSIGNED NOT NULL, - `symbol_end_col` INT UNSIGNED NOT NULL, PRIMARY KEY (`symbol_id`), KEY (`symbol_type`, `symbol_name`(32)), FOREIGN KEY (`symbol_code`) @@ -76,6 +72,20 @@ CREATE TABLE `symbols` ( ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB; +CREATE TABLE `symbol_locations` ( + `sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + `sloc_symbol` BIGINT UNSIGNED NOT NULL, + `sloc_type` TINYINT UNSIGNED NOT NULL, + `sloc_row` INT UNSIGNED NOT NULL, + `sloc_col` INT UNSIGNED NOT NULL, + `sloc_end_row` INT UNSIGNED NOT NULL, + `sloc_end_col` INT UNSIGNED NOT NULL, + PRIMARY KEY (`sloc_id`), + FOREIGN KEY (`sloc_symbol`) + REFERENCES `symbols` (`symbol_id`) + ON DELETE CASCADE ON UPDATE CASCADE +) ENGINE=InnoDB; + CREATE TABLE `cache` ( `cache_id` INT UNSIGNED NOT NULL AUTO_INCREMENT, `cache_hash` BIGINT NOT NULL, From d2aef2829e5edf11c2e392ce14436c5e452af42f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 2 May 2014 14:40:52 -0400 Subject: [PATCH 15/18] Finish database insertion, except for origins. --- bitshift/database/__init__.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 03a5c2c..bc4b451 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -51,6 +51,23 @@ class Database(object): "Run `python -m bitshift.database.migration`." raise RuntimeError(err) + def _decompose_url(self, url): + """Break up a URL into an origin (with a URL base) and a suffix.""" + pass ## TODO + + def _insert_symbols(self, cursor, code_id, sym_type, symbols): + """Insert a list of symbols of a given type into the database.""" + sym_types = ["functions", "classes", "variables"] + query1 = "INSERT INTO symbols VALUES (?, ?, ?)" + query2 = "INSERT INTO symbol_locations VALUES (?, ?, ?, ?, ?, ?)" + + for (name, decls, uses) in symbols: + cursor.execute(query1, (code_id, sym_types.index(sym_type), name)) + sym_id = cursor.lastrowid + params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] + + [tuple([sym_id, 1] + list(loc)) for loc in uses]) + cursor.executemany(query2, params) + def close(self): """Disconnect from the database.""" self._conn.close() @@ -89,23 +106,21 @@ class Database(object): ON DUPLICATE KEY UPDATE code_id=code_id""" query2 = """INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)""" - query3 = "SELECT LAST_INSERT_ID()" - query4 = "INSERT INTO authors VALUES (?, ?, ?)" - query5 = "INSERT INTO symbols VALUES (?, ?, ?, ?, ?, ?, ?)" + query3 = "INSERT INTO authors VALUES (?, ?, ?)" code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] - origin, url = decompose(codelet.url) ## TODO: create decompose() function + origin, url = self._decompose_url(codelet.url) with self._conn.cursor() as cursor: cursor.execute(query1, (code_id, codelet.code)) + new_code = cursor.rowcount == 1 cursor.execute(query2, (codelet.name, code_id, codelet.language, origin, url, codelet.rank, codelet.date_created, codelet.date_modified)) - cursor.execute(query3) - codelet_id = cursor.fetchone()[0] - authors = [(codelet_id, a.name, a.url) for a in codelet.authors] ## TODO: check author fields (is it a tuple?) - cursor.executemany(query4, authors) - if code_id is new: ## TODO: check for this properly - symbols = [(code_id, sym.type, sym.name, sym.row, sym.col, sym.end_row, sym.end_col) for sym in codelet.symbols] # TODO: check symbol fields (dict?) - cursor.executemany(query5, symbols) + codelet_id = cursor.lastrowid + authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] + cursor.executemany(query3, authors) + if new_code: + for sym_type, symbols in codelet.symbols.iteritems(): + self._insert_symbols(cursor, code_id, sym_type, symbols) From d6ccdbd16d1db369801ebd7a12ba1bf90df5225a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 2 May 2014 22:43:16 -0400 Subject: [PATCH 16/18] Fix a couble Database bugs. --- bitshift/database/__init__.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index bc4b451..1a2b373 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -16,15 +16,15 @@ class Database(object): """Represents the MySQL database.""" def __init__(self, migrate=False): - self._connect() + self._conn = self._connect() self._check_version(migrate) def _connect(self): """Establish a connection to the database.""" root = os.path.dirname(os.path.abspath(__file__)) default_file = os.path.join(root, ".my.cnf") - self._conn = oursql.connect(read_default_file=default_file, - autoping=True, autoreconnect=True) + return oursql.connect(db="bitshift", read_default_file=default_file, + autoping=True, autoreconnect=True) def _migrate(self, cursor, current): """Migrate the database to the latest schema version.""" @@ -58,8 +58,9 @@ class Database(object): def _insert_symbols(self, cursor, code_id, sym_type, symbols): """Insert a list of symbols of a given type into the database.""" sym_types = ["functions", "classes", "variables"] - query1 = "INSERT INTO symbols VALUES (?, ?, ?)" - query2 = "INSERT INTO symbol_locations VALUES (?, ?, ?, ?, ?, ?)" + query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)" + query2 = """INSERT INTO symbol_locations VALUES + (DEFAULT, ?, ?, ?, ?, ?, ?)""" for (name, decls, uses) in symbols: cursor.execute(query1, (code_id, sym_types.index(sym_type), name)) @@ -105,8 +106,8 @@ class Database(object): query1 = """INSERT INTO code VALUES (?, ?) ON DUPLICATE KEY UPDATE code_id=code_id""" query2 = """INSERT INTO codelets VALUES - (?, ?, ?, ?, ?, ?, ?, ?)""" - query3 = "INSERT INTO authors VALUES (?, ?, ?)" + (DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)""" + query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] origin, url = self._decompose_url(codelet.url) From 950b6994f0abb83192065cedaeeef07bd1b5dd99 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 3 May 2014 17:50:16 -0400 Subject: [PATCH 17/18] Database to v5; finish Database.insert(). --- bitshift/database/__init__.py | 23 ++++++++++++++--------- bitshift/database/migration.py | 9 ++++++++- bitshift/database/schema.sql | 11 ++++++----- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 1a2b373..9b039ca 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -51,9 +51,15 @@ class Database(object): "Run `python -m bitshift.database.migration`." raise RuntimeError(err) - def _decompose_url(self, url): + def _decompose_url(self, cursor, url): """Break up a URL into an origin (with a URL base) and a suffix.""" - pass ## TODO + query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) + FROM origins WHERE origin_url_base IS NOT NULL + AND ? LIKE CONCAT(origin_url_base, "%")""" + + cursor.execute(query, (url, url)) + result = cursor.fetchone() + return result if result else (1, url) def _insert_symbols(self, cursor, code_id, sym_type, symbols): """Insert a list of symbols of a given type into the database.""" @@ -109,12 +115,14 @@ class Database(object): (DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)""" query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" - code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] - origin, url = self._decompose_url(codelet.url) - with self._conn.cursor() as cursor: + code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] + origin, url = self._decompose_url(cursor, codelet.url) + cursor.execute(query1, (code_id, codelet.code)) - new_code = cursor.rowcount == 1 + if cursor.rowcount == 1: + for sym_type, symbols in codelet.symbols.iteritems(): + self._insert_symbols(cursor, code_id, sym_type, symbols) cursor.execute(query2, (codelet.name, code_id, codelet.language, origin, url, codelet.rank, codelet.date_created, @@ -122,6 +130,3 @@ class Database(object): codelet_id = cursor.lastrowid authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] cursor.executemany(query3, authors) - if new_code: - for sym_type, symbols in codelet.symbols.iteritems(): - self._insert_symbols(cursor, code_id, sym_type, symbols) diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py index e0ec762..743f906 100644 --- a/bitshift/database/migration.py +++ b/bitshift/database/migration.py @@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update between them. """ -VERSION = 4 +VERSION = 5 MIGRATIONS = [ # 1 -> 2 @@ -53,6 +53,13 @@ MIGRATIONS = [ REFERENCES `symbols` (`symbol_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB""" + ], + # 4 -> 5 + [ + """ALTER TABLE `origins` + MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, + MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, + MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" ] ] diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 79dad45..50b4f9e 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,4 +1,4 @@ --- Schema version 4 +-- Schema version 5 CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; @@ -6,16 +6,17 @@ USE `bitshift`; CREATE TABLE `version` ( `version` INT UNSIGNED NOT NULL ) ENGINE=InnoDB; -INSERT INTO `version` VALUES (4); +INSERT INTO `version` VALUES (5); CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, - `origin_name` VARCHAR(64) NOT NULL, - `origin_url` VARCHAR(512) NOT NULL, - `origin_url_base` VARCHAR(512) NOT NULL, + `origin_name` VARCHAR(64) DEFAULT NULL, + `origin_url` VARCHAR(512) DEFAULT NULL, + `origin_url_base` VARCHAR(512) DEFAULT NULL, `origin_image` BLOB DEFAULT NULL, PRIMARY KEY (`origin_id`) ) ENGINE=InnoDB; +INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL); CREATE TABLE `code` ( `code_id` BIGINT NOT NULL, From 56f23e682a24c3b199cc7add1447cf4130ba2657 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 4 May 2014 01:18:30 -0400 Subject: [PATCH 18/18] Database to v6; flesh out a lot of Database.search(). --- bitshift/database/__init__.py | 65 ++++++++++++++++++++++++++++-------------- bitshift/database/migration.py | 30 ++++++++++++++++++- bitshift/database/schema.sql | 23 +++++++++------ 3 files changed, 86 insertions(+), 32 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 9b039ca..75f39da 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -51,10 +51,15 @@ class Database(object): "Run `python -m bitshift.database.migration`." raise RuntimeError(err) + def _get_codelets_from_ids(self, cursor, ids): + """Return a list of Codelet objects given a list of codelet IDs.""" + raise NotImplementedError() ## TODO + def _decompose_url(self, cursor, url): """Break up a URL into an origin (with a URL base) and a suffix.""" query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) - FROM origins WHERE origin_url_base IS NOT NULL + FROM origins + WHERE origin_url_base IS NOT NULL AND ? LIKE CONCAT(origin_url_base, "%")""" cursor.execute(query, (url, url)) @@ -88,19 +93,35 @@ class Database(object): :param page: The result page to display. :type page: int - :return: A list of search results. - :rtype: list of :py:class:`.Codelet`\ s + :return: The total number of results, and the *n*\ th page of results. + :rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) """ - # search for cache_hash = mmh3.hash(query.serialize() + str(page)) - # cache HIT: - # update cache_last_used - # return codelets - # cache MISS: - # build complex search query - # fetch codelets - # cache results - # return codelets - pass + query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp + FROM cache + INNER JOIN cache_data ON cache_id = cdata_cache + WHERE cache_id = ?""" + query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" + query3 = "INSERT INTO cache_data VALUES (?, ?)" + + cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] + + with self._conn.cursor() as cursor: + cursor.execute(query1, (cache_id,)) + results = cursor.fetchall() + if results: # Cache hit + num_results = results[0][1] * (10 ** results[0][2]) + ids = [res[0] for res in results] + else: # Cache miss + ## TODO: build and execute search query + results = cursor.fetchall() + ids = NotImplemented ## TODO: extract ids from results + num_results = NotImplemented ## TODO: num if results else 0 + num_exp = max(len(str(num_results)) - 3, 0) + num_results = int(round(num_results, -num_exp)) + num_mnt = num_results / (10 ** num_exp) + cursor.execute(query2, (cache_id, num_mnt, num_exp)) + cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) + return (num_results, self._get_codelets_from_ids(cursor, ids)) def insert(self, codelet): """ @@ -109,23 +130,23 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ - query1 = """INSERT INTO code VALUES (?, ?) + query1 = """INSERT INTO code VALUES (?, ?, ?) ON DUPLICATE KEY UPDATE code_id=code_id""" query2 = """INSERT INTO codelets VALUES - (DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)""" + (DEFAULT, ?, ?, ?, ?, ?, ?, ?)""" query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" - with self._conn.cursor() as cursor: - code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] - origin, url = self._decompose_url(cursor, codelet.url) + hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8") + code_id = mmh3.hash64(hash_key)[0] - cursor.execute(query1, (code_id, codelet.code)) + with self._conn.cursor() as cursor: + cursor.execute(query1, (code_id, codelet.language, codelet.code)) if cursor.rowcount == 1: for sym_type, symbols in codelet.symbols.iteritems(): self._insert_symbols(cursor, code_id, sym_type, symbols) - cursor.execute(query2, (codelet.name, code_id, codelet.language, - origin, url, codelet.rank, - codelet.date_created, + origin, url = self._decompose_url(cursor, codelet.url) + cursor.execute(query2, (codelet.name, code_id, origin, url, + codelet.rank, codelet.date_created, codelet.date_modified)) codelet_id = cursor.lastrowid authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py index 743f906..24f744a 100644 --- a/bitshift/database/migration.py +++ b/bitshift/database/migration.py @@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update between them. """ -VERSION = 5 +VERSION = 6 MIGRATIONS = [ # 1 -> 2 @@ -60,6 +60,34 @@ MIGRATIONS = [ MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" + ], + # 5 -> 6 + [ + """ALTER TABLE `code` + ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL + AFTER `code_id`, + ADD KEY (`code_lang`)""", + """ALTER TABLE `codelets` + DROP KEY `codelet_lang`, + DROP COLUMN `codelet_lang`""", + """ALTER TABLE `cache_data` + DROP FOREIGN KEY `cache_data_ibfk_1`""", + """ALTER TABLE `cache` + MODIFY COLUMN `cache_id` BIGINT NOT NULL, + DROP COLUMN `cache_hash`, + DROP COLUMN `cache_last_used`, + MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""", + """ALTER TABLE `cache_data` + MODIFY COLUMN `cdata_cache` BIGINT NOT NULL, + ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`), + ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`) + REFERENCES `codelets` (`codelet_id`) + ON DELETE CASCADE ON UPDATE CASCADE""", + """CREATE EVENT `flush_cache` + ON SCHEDULE EVERY 1 HOUR + DO + DELETE FROM `cache` + WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);""" ] ] diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 50b4f9e..8634416 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,4 +1,4 @@ --- Schema version 5 +-- Schema version 6 CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; @@ -6,7 +6,7 @@ USE `bitshift`; CREATE TABLE `version` ( `version` INT UNSIGNED NOT NULL ) ENGINE=InnoDB; -INSERT INTO `version` VALUES (5); +INSERT INTO `version` VALUES (6); CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, @@ -20,8 +20,10 @@ INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL); CREATE TABLE `code` ( `code_id` BIGINT NOT NULL, + `code_lang` SMALLINT UNSIGNED DEFAULT NULL, `code_code` MEDIUMTEXT NOT NULL, PRIMARY KEY (`code_id`), + KEY (`code_lang`), FULLTEXT KEY (`code_code`) ) ENGINE=InnoDB; @@ -29,7 +31,6 @@ CREATE TABLE `codelets` ( `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, `codelet_name` VARCHAR(300) NOT NULL, `codelet_code_id` BIGINT NOT NULL, - `codelet_lang` SMALLINT UNSIGNED DEFAULT NULL, `codelet_origin` TINYINT UNSIGNED NOT NULL, `codelet_url` VARCHAR(512) NOT NULL, `codelet_rank` FLOAT NOT NULL, @@ -37,7 +38,6 @@ CREATE TABLE `codelets` ( `codelet_date_modified` DATETIME DEFAULT NULL, PRIMARY KEY (`codelet_id`), FULLTEXT KEY (`codelet_name`), - KEY (`codelet_lang`), KEY (`codelet_rank`), KEY (`codelet_date_created`), KEY (`codelet_date_modified`), @@ -88,18 +88,17 @@ CREATE TABLE `symbol_locations` ( ) ENGINE=InnoDB; CREATE TABLE `cache` ( - `cache_id` INT UNSIGNED NOT NULL AUTO_INCREMENT, - `cache_hash` BIGINT NOT NULL, - `cache_count_mnt` TINYINT UNSIGNED NOT NULL, + `cache_id` BIGINT NOT NULL, + `cache_count_mnt` SMALLINT UNSIGNED NOT NULL, `cache_count_exp` TINYINT UNSIGNED NOT NULL, `cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - `cache_last_used` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`cache_id`) ) ENGINE=InnoDB; CREATE TABLE `cache_data` ( - `cdata_cache` INT UNSIGNED NOT NULL, + `cdata_cache` BIGINT NOT NULL, `cdata_codelet` BIGINT UNSIGNED NOT NULL, + PRIMARY KEY (`cdata_cache`, `cdata_codelet`), FOREIGN KEY (`cdata_cache`) REFERENCES `cache` (`cache_id`) ON DELETE CASCADE ON UPDATE CASCADE, @@ -107,3 +106,9 @@ CREATE TABLE `cache_data` ( REFERENCES `codelets` (`codelet_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB; + +CREATE EVENT `flush_cache` + ON SCHEDULE EVERY 1 HOUR + DO + DELETE FROM `cache` + WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);