@@ -1,5 +1,6 @@ | |||||
.sass-cache | .sass-cache | ||||
.DS_Store | .DS_Store | ||||
.my.cnf | |||||
# github premade rules | # github premade rules | ||||
*.py[cod] | *.py[cod] | ||||
@@ -5,6 +5,7 @@ Module to contain all the project's Flask server plumbing. | |||||
from flask import Flask | from flask import Flask | ||||
from flask import render_template, session | from flask import render_template, session | ||||
from bitshift.database import Database | |||||
from bitshift.query import parse_query | from bitshift.query import parse_query | ||||
app = Flask(__name__) | app = Flask(__name__) | ||||
@@ -12,7 +13,9 @@ app.config.from_object("bitshift.config") | |||||
app_env = app.jinja_env | app_env = app.jinja_env | ||||
app_env.line_statement_prefix = "=" | app_env.line_statement_prefix = "=" | ||||
app_env.globals.update(assets = assets) | |||||
app_env.globals.update(assets=assets) | |||||
database = Database() | |||||
@app.route("/") | @app.route("/") | ||||
def index(): | def index(): | ||||
@@ -20,8 +23,8 @@ def index(): | |||||
@app.route("/search/<query>") | @app.route("/search/<query>") | ||||
def search(query): | def search(query): | ||||
## tree = parse_query(query) | |||||
## database.search(tree) | |||||
tree = parse_query(query) | |||||
database.search(tree) | |||||
pass | pass | ||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
@@ -1,18 +0,0 @@ | |||||
""" | |||||
Module with classes and functions to handle communication with the MySQL | |||||
database backend, which manages the search index. | |||||
""" | |||||
import oursql | |||||
class Database(object): | |||||
"""Represents the MySQL database.""" | |||||
def __init__(self): | |||||
pass | |||||
def _connect(self): | |||||
pass | |||||
def _create(self): | |||||
pass |
@@ -0,0 +1,153 @@ | |||||
""" | |||||
Subpackage with classes and functions to handle communication with the MySQL | |||||
database backend, which manages the search index. | |||||
""" | |||||
import os | |||||
import mmh3 | |||||
import oursql | |||||
from .migration import VERSION, MIGRATIONS | |||||
__all__ = ["Database"] | |||||
class Database(object): | |||||
"""Represents the MySQL database.""" | |||||
def __init__(self, migrate=False): | |||||
self._conn = self._connect() | |||||
self._check_version(migrate) | |||||
def _connect(self): | |||||
"""Establish a connection to the database.""" | |||||
root = os.path.dirname(os.path.abspath(__file__)) | |||||
default_file = os.path.join(root, ".my.cnf") | |||||
return oursql.connect(db="bitshift", read_default_file=default_file, | |||||
autoping=True, autoreconnect=True) | |||||
def _migrate(self, cursor, current): | |||||
"""Migrate the database to the latest schema version.""" | |||||
for version in xrange(current, VERSION): | |||||
print "Migrating to %d..." % version + 1 | |||||
for query in MIGRATIONS[version - 1]: | |||||
cursor.execute(query) | |||||
cursor.execute("UPDATE version SET version = ?", (version + 1,)) | |||||
def _check_version(self, migrate): | |||||
"""Check the database schema version and respond accordingly. | |||||
If the schema is out of date, migrate if *migrate* is True, else raise | |||||
an exception. | |||||
""" | |||||
with self._conn.cursor() as cursor: | |||||
cursor.execute("SELECT version FROM version") | |||||
version = cursor.fetchone()[0] | |||||
if version < VERSION: | |||||
if migrate: | |||||
self._migrate(cursor, version) | |||||
else: | |||||
err = "Database schema out of date. " \ | |||||
"Run `python -m bitshift.database.migration`." | |||||
raise RuntimeError(err) | |||||
def _get_codelets_from_ids(self, cursor, ids): | |||||
"""Return a list of Codelet objects given a list of codelet IDs.""" | |||||
raise NotImplementedError() ## TODO | |||||
def _decompose_url(self, cursor, url): | |||||
"""Break up a URL into an origin (with a URL base) and a suffix.""" | |||||
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) | |||||
FROM origins | |||||
WHERE origin_url_base IS NOT NULL | |||||
AND ? LIKE CONCAT(origin_url_base, "%")""" | |||||
cursor.execute(query, (url, url)) | |||||
result = cursor.fetchone() | |||||
return result if result else (1, url) | |||||
def _insert_symbols(self, cursor, code_id, sym_type, symbols): | |||||
"""Insert a list of symbols of a given type into the database.""" | |||||
sym_types = ["functions", "classes", "variables"] | |||||
query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)" | |||||
query2 = """INSERT INTO symbol_locations VALUES | |||||
(DEFAULT, ?, ?, ?, ?, ?, ?)""" | |||||
for (name, decls, uses) in symbols: | |||||
cursor.execute(query1, (code_id, sym_types.index(sym_type), name)) | |||||
sym_id = cursor.lastrowid | |||||
params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] + | |||||
[tuple([sym_id, 1] + list(loc)) for loc in uses]) | |||||
cursor.executemany(query2, params) | |||||
def close(self): | |||||
"""Disconnect from the database.""" | |||||
self._conn.close() | |||||
def search(self, query, page=1): | |||||
""" | |||||
Search the database for a query and return the *n*\ th page of results. | |||||
:param query: The query to search for. | |||||
:type query: :py:class:`~.query.tree.Tree` | |||||
:param page: The result page to display. | |||||
:type page: int | |||||
:return: The total number of results, and the *n*\ th page of results. | |||||
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) | |||||
""" | |||||
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||||
FROM cache | |||||
INNER JOIN cache_data ON cache_id = cdata_cache | |||||
WHERE cache_id = ?""" | |||||
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||||
query3 = "INSERT INTO cache_data VALUES (?, ?)" | |||||
cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] | |||||
with self._conn.cursor() as cursor: | |||||
cursor.execute(query1, (cache_id,)) | |||||
results = cursor.fetchall() | |||||
if results: # Cache hit | |||||
num_results = results[0][1] * (10 ** results[0][2]) | |||||
ids = [res[0] for res in results] | |||||
else: # Cache miss | |||||
## TODO: build and execute search query | |||||
results = cursor.fetchall() | |||||
ids = NotImplemented ## TODO: extract ids from results | |||||
num_results = NotImplemented ## TODO: num if results else 0 | |||||
num_exp = max(len(str(num_results)) - 3, 0) | |||||
num_results = int(round(num_results, -num_exp)) | |||||
num_mnt = num_results / (10 ** num_exp) | |||||
cursor.execute(query2, (cache_id, num_mnt, num_exp)) | |||||
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) | |||||
return (num_results, self._get_codelets_from_ids(cursor, ids)) | |||||
def insert(self, codelet): | |||||
""" | |||||
Insert a codelet into the database. | |||||
:param codelet: The codelet to insert. | |||||
:type codelet: :py:class:`.Codelet` | |||||
""" | |||||
query1 = """INSERT INTO code VALUES (?, ?, ?) | |||||
ON DUPLICATE KEY UPDATE code_id=code_id""" | |||||
query2 = """INSERT INTO codelets VALUES | |||||
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)""" | |||||
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" | |||||
hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8") | |||||
code_id = mmh3.hash64(hash_key)[0] | |||||
with self._conn.cursor() as cursor: | |||||
cursor.execute(query1, (code_id, codelet.language, codelet.code)) | |||||
if cursor.rowcount == 1: | |||||
for sym_type, symbols in codelet.symbols.iteritems(): | |||||
self._insert_symbols(cursor, code_id, sym_type, symbols) | |||||
origin, url = self._decompose_url(cursor, codelet.url) | |||||
cursor.execute(query2, (codelet.name, code_id, origin, url, | |||||
codelet.rank, codelet.date_created, | |||||
codelet.date_modified)) | |||||
codelet_id = cursor.lastrowid | |||||
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] | |||||
cursor.executemany(query3, authors) |
@@ -0,0 +1,97 @@ | |||||
""" | |||||
Contains information about database schema versions, and SQL queries to update | |||||
between them. | |||||
""" | |||||
VERSION = 6 | |||||
MIGRATIONS = [ | |||||
# 1 -> 2 | |||||
[ | |||||
"""ALTER TABLE `codelets` | |||||
DROP FOREIGN KEY `codelets_ibfk_1`""", | |||||
"""ALTER TABLE `code` | |||||
DROP KEY `code_hash`, | |||||
DROP COLUMN `code_hash`, | |||||
MODIFY COLUMN `code_id` BIGINT NOT NULL""", | |||||
"""ALTER TABLE `codelets` | |||||
MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL, | |||||
ADD KEY (`codelet_lang`), | |||||
ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`) | |||||
REFERENCES `code` (`code_id`) | |||||
ON DELETE RESTRICT ON UPDATE CASCADE""", | |||||
"""ALTER TABLE `symbols` | |||||
ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL, | |||||
ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL""" | |||||
], | |||||
# 2 -> 3 | |||||
[ | |||||
"""ALTER TABLE `symbols` | |||||
DROP FOREIGN KEY `symbols_ibfk_1`, | |||||
CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL, | |||||
ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`) | |||||
REFERENCES `code` (`code_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE""" | |||||
], | |||||
# 3 -> 4 | |||||
[ | |||||
"""ALTER TABLE `symbols` | |||||
DROP COLUMN `symbol_row`, | |||||
DROP COLUMN `symbol_col`, | |||||
DROP COLUMN `symbol_end_row`, | |||||
DROP COLUMN `symbol_end_col`""", | |||||
"""CREATE TABLE `symbol_locations` ( | |||||
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||||
`sloc_symbol` BIGINT UNSIGNED NOT NULL, | |||||
`sloc_type` TINYINT UNSIGNED NOT NULL, | |||||
`sloc_row` INT UNSIGNED NOT NULL, | |||||
`sloc_col` INT UNSIGNED NOT NULL, | |||||
`sloc_end_row` INT UNSIGNED NOT NULL, | |||||
`sloc_end_col` INT UNSIGNED NOT NULL, | |||||
PRIMARY KEY (`sloc_id`), | |||||
FOREIGN KEY (`sloc_symbol`) | |||||
REFERENCES `symbols` (`symbol_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE | |||||
) ENGINE=InnoDB""" | |||||
], | |||||
# 4 -> 5 | |||||
[ | |||||
"""ALTER TABLE `origins` | |||||
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, | |||||
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, | |||||
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" | |||||
], | |||||
# 5 -> 6 | |||||
[ | |||||
"""ALTER TABLE `code` | |||||
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL | |||||
AFTER `code_id`, | |||||
ADD KEY (`code_lang`)""", | |||||
"""ALTER TABLE `codelets` | |||||
DROP KEY `codelet_lang`, | |||||
DROP COLUMN `codelet_lang`""", | |||||
"""ALTER TABLE `cache_data` | |||||
DROP FOREIGN KEY `cache_data_ibfk_1`""", | |||||
"""ALTER TABLE `cache` | |||||
MODIFY COLUMN `cache_id` BIGINT NOT NULL, | |||||
DROP COLUMN `cache_hash`, | |||||
DROP COLUMN `cache_last_used`, | |||||
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""", | |||||
"""ALTER TABLE `cache_data` | |||||
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL, | |||||
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||||
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`) | |||||
REFERENCES `codelets` (`codelet_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE""", | |||||
"""CREATE EVENT `flush_cache` | |||||
ON SCHEDULE EVERY 1 HOUR | |||||
DO | |||||
DELETE FROM `cache` | |||||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);""" | |||||
] | |||||
] | |||||
if __name__ == "__main__": | |||||
from . import Database | |||||
Database(migrate=True).close() |
@@ -0,0 +1,114 @@ | |||||
-- Schema version 6 | |||||
CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; | |||||
USE `bitshift`; | |||||
CREATE TABLE `version` ( | |||||
`version` INT UNSIGNED NOT NULL | |||||
) ENGINE=InnoDB; | |||||
INSERT INTO `version` VALUES (6); | |||||
CREATE TABLE `origins` ( | |||||
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||||
`origin_name` VARCHAR(64) DEFAULT NULL, | |||||
`origin_url` VARCHAR(512) DEFAULT NULL, | |||||
`origin_url_base` VARCHAR(512) DEFAULT NULL, | |||||
`origin_image` BLOB DEFAULT NULL, | |||||
PRIMARY KEY (`origin_id`) | |||||
) ENGINE=InnoDB; | |||||
INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL); | |||||
CREATE TABLE `code` ( | |||||
`code_id` BIGINT NOT NULL, | |||||
`code_lang` SMALLINT UNSIGNED DEFAULT NULL, | |||||
`code_code` MEDIUMTEXT NOT NULL, | |||||
PRIMARY KEY (`code_id`), | |||||
KEY (`code_lang`), | |||||
FULLTEXT KEY (`code_code`) | |||||
) ENGINE=InnoDB; | |||||
CREATE TABLE `codelets` ( | |||||
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||||
`codelet_name` VARCHAR(300) NOT NULL, | |||||
`codelet_code_id` BIGINT NOT NULL, | |||||
`codelet_origin` TINYINT UNSIGNED NOT NULL, | |||||
`codelet_url` VARCHAR(512) NOT NULL, | |||||
`codelet_rank` FLOAT NOT NULL, | |||||
`codelet_date_created` DATETIME DEFAULT NULL, | |||||
`codelet_date_modified` DATETIME DEFAULT NULL, | |||||
PRIMARY KEY (`codelet_id`), | |||||
FULLTEXT KEY (`codelet_name`), | |||||
KEY (`codelet_rank`), | |||||
KEY (`codelet_date_created`), | |||||
KEY (`codelet_date_modified`), | |||||
FOREIGN KEY (`codelet_code_id`) | |||||
REFERENCES `code` (`code_id`) | |||||
ON DELETE RESTRICT ON UPDATE CASCADE, | |||||
FOREIGN KEY (`codelet_origin`) | |||||
REFERENCES `origins` (`origin_id`) | |||||
ON DELETE RESTRICT ON UPDATE CASCADE | |||||
) ENGINE=InnoDB; | |||||
CREATE TABLE `authors` ( | |||||
`author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||||
`author_codelet` BIGINT UNSIGNED NOT NULL, | |||||
`author_name` VARCHAR(128) NOT NULL, | |||||
`author_url` VARCHAR(512) DEFAULT NULL, | |||||
PRIMARY KEY (`author_id`), | |||||
FULLTEXT KEY (`author_name`), | |||||
FOREIGN KEY (`author_codelet`) | |||||
REFERENCES `codelets` (`codelet_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE | |||||
) ENGINE=InnoDB; | |||||
CREATE TABLE `symbols` ( | |||||
`symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||||
`symbol_code` BIGINT NOT NULL, | |||||
`symbol_type` TINYINT UNSIGNED NOT NULL, | |||||
`symbol_name` VARCHAR(512) NOT NULL, | |||||
PRIMARY KEY (`symbol_id`), | |||||
KEY (`symbol_type`, `symbol_name`(32)), | |||||
FOREIGN KEY (`symbol_code`) | |||||
REFERENCES `code` (`code_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE | |||||
) ENGINE=InnoDB; | |||||
CREATE TABLE `symbol_locations` ( | |||||
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||||
`sloc_symbol` BIGINT UNSIGNED NOT NULL, | |||||
`sloc_type` TINYINT UNSIGNED NOT NULL, | |||||
`sloc_row` INT UNSIGNED NOT NULL, | |||||
`sloc_col` INT UNSIGNED NOT NULL, | |||||
`sloc_end_row` INT UNSIGNED NOT NULL, | |||||
`sloc_end_col` INT UNSIGNED NOT NULL, | |||||
PRIMARY KEY (`sloc_id`), | |||||
FOREIGN KEY (`sloc_symbol`) | |||||
REFERENCES `symbols` (`symbol_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE | |||||
) ENGINE=InnoDB; | |||||
CREATE TABLE `cache` ( | |||||
`cache_id` BIGINT NOT NULL, | |||||
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL, | |||||
`cache_count_exp` TINYINT UNSIGNED NOT NULL, | |||||
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||||
PRIMARY KEY (`cache_id`) | |||||
) ENGINE=InnoDB; | |||||
CREATE TABLE `cache_data` ( | |||||
`cdata_cache` BIGINT NOT NULL, | |||||
`cdata_codelet` BIGINT UNSIGNED NOT NULL, | |||||
PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||||
FOREIGN KEY (`cdata_cache`) | |||||
REFERENCES `cache` (`cache_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE, | |||||
FOREIGN KEY (`cdata_codelet`) | |||||
REFERENCES `codelets` (`codelet_id`) | |||||
ON DELETE CASCADE ON UPDATE CASCADE | |||||
) ENGINE=InnoDB; | |||||
CREATE EVENT `flush_cache` | |||||
ON SCHEDULE EVERY 1 HOUR | |||||
DO | |||||
DELETE FROM `cache` | |||||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY); |
@@ -6,4 +6,6 @@ __all__ = ["parse_query"] | |||||
def parse_query(query): | def parse_query(query): | ||||
# gets a string, returns a Tree | # gets a string, returns a Tree | ||||
# TODO: note: resultant Trees should be normalized so that "foo OR bar" | |||||
# and "bar OR foo" result in equivalent trees | |||||
pass | pass |
@@ -4,8 +4,9 @@ setup( | |||||
name = "bitshift", | name = "bitshift", | ||||
version = "0.1", | version = "0.1", | ||||
packages = find_packages(), | packages = find_packages(), | ||||
install_requires = ["Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", | |||||
"BeautifulSoup>=3.2.1"], | |||||
install_requires = [ | |||||
"Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", | |||||
"BeautifulSoup>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"], | |||||
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", | author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", | ||||
license = "MIT", | license = "MIT", | ||||
url = "https://github.com/earwig/bitshift" | url = "https://github.com/earwig/bitshift" | ||||