diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 1a2b373..9b039ca 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -51,9 +51,15 @@ class Database(object): "Run `python -m bitshift.database.migration`." raise RuntimeError(err) - def _decompose_url(self, url): + def _decompose_url(self, cursor, url): """Break up a URL into an origin (with a URL base) and a suffix.""" - pass ## TODO + query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) + FROM origins WHERE origin_url_base IS NOT NULL + AND ? LIKE CONCAT(origin_url_base, "%")""" + + cursor.execute(query, (url, url)) + result = cursor.fetchone() + return result if result else (1, url) def _insert_symbols(self, cursor, code_id, sym_type, symbols): """Insert a list of symbols of a given type into the database.""" @@ -109,12 +115,14 @@ class Database(object): (DEFAULT, ?, ?, ?, ?, ?, ?, ?, ?)""" query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" - code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] - origin, url = self._decompose_url(codelet.url) - with self._conn.cursor() as cursor: + code_id = mmh3.hash64(codelet.code.encode("utf8"))[0] + origin, url = self._decompose_url(cursor, codelet.url) + cursor.execute(query1, (code_id, codelet.code)) - new_code = cursor.rowcount == 1 + if cursor.rowcount == 1: + for sym_type, symbols in codelet.symbols.iteritems(): + self._insert_symbols(cursor, code_id, sym_type, symbols) cursor.execute(query2, (codelet.name, code_id, codelet.language, origin, url, codelet.rank, codelet.date_created, @@ -122,6 +130,3 @@ class Database(object): codelet_id = cursor.lastrowid authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] cursor.executemany(query3, authors) - if new_code: - for sym_type, symbols in codelet.symbols.iteritems(): - self._insert_symbols(cursor, code_id, sym_type, symbols) diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py index e0ec762..743f906 100644 --- a/bitshift/database/migration.py +++ b/bitshift/database/migration.py @@ -3,7 +3,7 @@ Contains information about database schema versions, and SQL queries to update between them. """ -VERSION = 4 +VERSION = 5 MIGRATIONS = [ # 1 -> 2 @@ -53,6 +53,13 @@ MIGRATIONS = [ REFERENCES `symbols` (`symbol_id`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB""" + ], + # 4 -> 5 + [ + """ALTER TABLE `origins` + MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, + MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, + MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" ] ] diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql index 79dad45..50b4f9e 100644 --- a/bitshift/database/schema.sql +++ b/bitshift/database/schema.sql @@ -1,4 +1,4 @@ --- Schema version 4 +-- Schema version 5 CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; USE `bitshift`; @@ -6,16 +6,17 @@ USE `bitshift`; CREATE TABLE `version` ( `version` INT UNSIGNED NOT NULL ) ENGINE=InnoDB; -INSERT INTO `version` VALUES (4); +INSERT INTO `version` VALUES (5); CREATE TABLE `origins` ( `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, - `origin_name` VARCHAR(64) NOT NULL, - `origin_url` VARCHAR(512) NOT NULL, - `origin_url_base` VARCHAR(512) NOT NULL, + `origin_name` VARCHAR(64) DEFAULT NULL, + `origin_url` VARCHAR(512) DEFAULT NULL, + `origin_url_base` VARCHAR(512) DEFAULT NULL, `origin_image` BLOB DEFAULT NULL, PRIMARY KEY (`origin_id`) ) ENGINE=InnoDB; +INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL); CREATE TABLE `code` ( `code_id` BIGINT NOT NULL,