diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 9a54ef2..50486b6 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -8,15 +8,16 @@ import os import mmh3 import oursql -# from ..languages import ... +from .migration import VERSION, MIGRATIONS __all__ = ["Database"] class Database(object): """Represents the MySQL database.""" - def __init__(self): + def __init__(self, migrate=False): self._connect() + self._check_version(migrate) def _connect(self): """Establish a connection to the database.""" @@ -25,6 +26,33 @@ class Database(object): self._conn = oursql.connect(read_default_file=default_file, autoping=True, autoreconnect=True) + def _migrate(self, cursor, current): + """Migrate the database to the latest schema version.""" + for version in xrange(current, VERSION): + for query in MIGRATIONS[version - 1]: + cursor.execute(query) + + def _check_version(self, migrate): + """Check the database schema version and respond accordingly. + + If the schema is out of date, migrate if *migrate* is True, else raise + an exception. + """ + with self._conn.cursor() as cursor: + cursor.execute("SELECT version FROM version") + version = cursor.fetchone()[0] + if version < VERSION: + if migrate: + self._migrate(cursor, version) + else: + err = "Database schema out of date. " \ + "Run `python -m bitshift.database.migration`." + raise RuntimeError(err) + + def close(self): + """Disconnect from the database.""" + self._conn.close() + def search(self, query, page=1): """ Search the database for a query and return the *n*\ th page of results. @@ -55,19 +83,14 @@ class Database(object): :param codelet: The codelet to insert. :type codelet: :py:class:`.Codelet` """ - frag_size = 16384 # 16 kB - query_slt1 = """SELECT code_id, LEFT(code_code, {0}) - FROM code WHERE code_hash = ?""".format(frag_size) - query_ins1 = "INSERT INTO code VALUES (?, ?)" - query_ins2 = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" - query_ins3 = "INSERT INTO authors VALUES", " (?, ?, ?)" - query_ins4 = "INSERT INTO symbols VALUES", " (?, ?, ?, ?, ?)" + query1 = """INSERT INTO code VALUES (?, ?) + ON DUPLICATE KEY UPDATE code_id=code_id""" + query2 = "INSERT INTO codelets VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + query3 = "INSERT INTO authors VALUES", " (?, ?, ?)" + query4 = "INSERT INTO symbols VALUES", " (?, ?, ?, ?, ?)" # LAST_INSERT_ID() - code_id = None - code_hash = mmh3.hash64(codelet.code.encode("utf8"))[0] - # codelet_id -- auto_increment used here codelet_name codelet_code_id @@ -88,14 +111,9 @@ class Database(object): codelet.date_created codelet.date_modified + ####################################################################### + + code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] + with self._conn.cursor() as cursor: - # Retrieve the ID of the source code if it's already in the DB: - cursor.execute(query_slt1, (code_hash,)) - for c_id, c_code_frag in cursor.fetchall(): - if c_code_frag == codelet.code[:frag_size]: - code_id = c_id - break - - # If the source code isn't already in the DB, add it: - if not code_id: - cursor.execute() + cursor.execute(query1, (code_id, codelet.code)) diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py new file mode 100644 index 0000000..c9fdd39 --- /dev/null +++ b/bitshift/database/migration.py @@ -0,0 +1,23 @@ +""" +Contains information about database schema versions, and SQL queries to update +between them. +""" + +VERSION = 2 + +MIGRATIONS = [ + # 1 -> 2 + [ + # drop index on code_hash + "ALTER TABLE code DROP COLUMN code_hash", + # change code_id to BIGINT NOT NULL, + # add key on codelets to codelet_lang + # add symbol_end_row INT UNSIGNED NOT NULL + # add symbol_end_col INT UNSIGNED NOT NULL + ] +] + +if __name__ == "__main__": + from . import Database + + Database(migrate=True).close() diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 15979be..159f85a 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,6 +1,12 @@ +-- Schema version 2 + CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; +CREATE TABLE `version` ( + `version` INT UNSIGNED NOT NULL +) ENGINE=InnoDB; + CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, `origin_name` VARCHAR(64) NOT NULL, @@ -11,11 +17,9 @@ CREATE TABLE `origins` ( ) ENGINE=InnoDB; CREATE TABLE `code` ( - `code_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, - `code_hash` BIGINT NOT NULL, + `code_id` BIGINT NOT NULL, `code_code` MEDIUMTEXT NOT NULL, PRIMARY KEY (`code_id`), - KEY (`code_hash`), FULLTEXT KEY (`code_code`) ) ENGINE=InnoDB; @@ -31,6 +35,7 @@ CREATE TABLE `codelets` ( `codelet_date_modified` DATETIME DEFAULT NULL, PRIMARY KEY (`codelet_id`), FULLTEXT KEY (`codelet_name`), + KEY (`codelet_lang`), KEY (`codelet_rank`), KEY (`codelet_date_created`), KEY (`codelet_date_modified`), @@ -61,6 +66,8 @@ CREATE TABLE `symbols` ( `symbol_name` VARCHAR(512) NOT NULL, `symbol_row` INT UNSIGNED NOT NULL, `symbol_col` INT UNSIGNED NOT NULL, + `symbol_end_row` INT UNSIGNED NOT NULL, + `symbol_end_col` INT UNSIGNED NOT NULL, PRIMARY KEY (`symbol_id`), KEY (`symbol_type`, `symbol_name`(32)), FOREIGN KEY (`symbol_codelet`)