@@ -1,5 +1,6 @@ | |||
.sass-cache | |||
.DS_Store | |||
.my.cnf | |||
# github premade rules | |||
*.py[cod] | |||
@@ -5,6 +5,7 @@ Module to contain all the project's Flask server plumbing. | |||
from flask import Flask | |||
from flask import render_template, session | |||
from bitshift.database import Database | |||
from bitshift.query import parse_query | |||
app = Flask(__name__) | |||
@@ -12,7 +13,9 @@ app.config.from_object("bitshift.config") | |||
app_env = app.jinja_env | |||
app_env.line_statement_prefix = "=" | |||
app_env.globals.update(assets = assets) | |||
app_env.globals.update(assets=assets) | |||
database = Database() | |||
@app.route("/") | |||
def index(): | |||
@@ -20,8 +23,8 @@ def index(): | |||
@app.route("/search/<query>") | |||
def search(query): | |||
## tree = parse_query(query) | |||
## database.search(tree) | |||
tree = parse_query(query) | |||
database.search(tree) | |||
pass | |||
if __name__ == "__main__": | |||
@@ -1,18 +0,0 @@ | |||
""" | |||
Module with classes and functions to handle communication with the MySQL | |||
database backend, which manages the search index. | |||
""" | |||
import oursql | |||
class Database(object): | |||
"""Represents the MySQL database.""" | |||
def __init__(self): | |||
pass | |||
def _connect(self): | |||
pass | |||
def _create(self): | |||
pass |
@@ -0,0 +1,153 @@ | |||
""" | |||
Subpackage with classes and functions to handle communication with the MySQL | |||
database backend, which manages the search index. | |||
""" | |||
import os | |||
import mmh3 | |||
import oursql | |||
from .migration import VERSION, MIGRATIONS | |||
__all__ = ["Database"] | |||
class Database(object): | |||
"""Represents the MySQL database.""" | |||
def __init__(self, migrate=False): | |||
self._conn = self._connect() | |||
self._check_version(migrate) | |||
def _connect(self): | |||
"""Establish a connection to the database.""" | |||
root = os.path.dirname(os.path.abspath(__file__)) | |||
default_file = os.path.join(root, ".my.cnf") | |||
return oursql.connect(db="bitshift", read_default_file=default_file, | |||
autoping=True, autoreconnect=True) | |||
def _migrate(self, cursor, current): | |||
"""Migrate the database to the latest schema version.""" | |||
for version in xrange(current, VERSION): | |||
print "Migrating to %d..." % version + 1 | |||
for query in MIGRATIONS[version - 1]: | |||
cursor.execute(query) | |||
cursor.execute("UPDATE version SET version = ?", (version + 1,)) | |||
def _check_version(self, migrate): | |||
"""Check the database schema version and respond accordingly. | |||
If the schema is out of date, migrate if *migrate* is True, else raise | |||
an exception. | |||
""" | |||
with self._conn.cursor() as cursor: | |||
cursor.execute("SELECT version FROM version") | |||
version = cursor.fetchone()[0] | |||
if version < VERSION: | |||
if migrate: | |||
self._migrate(cursor, version) | |||
else: | |||
err = "Database schema out of date. " \ | |||
"Run `python -m bitshift.database.migration`." | |||
raise RuntimeError(err) | |||
def _get_codelets_from_ids(self, cursor, ids): | |||
"""Return a list of Codelet objects given a list of codelet IDs.""" | |||
raise NotImplementedError() ## TODO | |||
def _decompose_url(self, cursor, url): | |||
"""Break up a URL into an origin (with a URL base) and a suffix.""" | |||
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base)) | |||
FROM origins | |||
WHERE origin_url_base IS NOT NULL | |||
AND ? LIKE CONCAT(origin_url_base, "%")""" | |||
cursor.execute(query, (url, url)) | |||
result = cursor.fetchone() | |||
return result if result else (1, url) | |||
def _insert_symbols(self, cursor, code_id, sym_type, symbols): | |||
"""Insert a list of symbols of a given type into the database.""" | |||
sym_types = ["functions", "classes", "variables"] | |||
query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)" | |||
query2 = """INSERT INTO symbol_locations VALUES | |||
(DEFAULT, ?, ?, ?, ?, ?, ?)""" | |||
for (name, decls, uses) in symbols: | |||
cursor.execute(query1, (code_id, sym_types.index(sym_type), name)) | |||
sym_id = cursor.lastrowid | |||
params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] + | |||
[tuple([sym_id, 1] + list(loc)) for loc in uses]) | |||
cursor.executemany(query2, params) | |||
def close(self): | |||
"""Disconnect from the database.""" | |||
self._conn.close() | |||
def search(self, query, page=1): | |||
""" | |||
Search the database for a query and return the *n*\ th page of results. | |||
:param query: The query to search for. | |||
:type query: :py:class:`~.query.tree.Tree` | |||
:param page: The result page to display. | |||
:type page: int | |||
:return: The total number of results, and the *n*\ th page of results. | |||
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) | |||
""" | |||
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||
FROM cache | |||
INNER JOIN cache_data ON cache_id = cdata_cache | |||
WHERE cache_id = ?""" | |||
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||
query3 = "INSERT INTO cache_data VALUES (?, ?)" | |||
cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] | |||
with self._conn.cursor() as cursor: | |||
cursor.execute(query1, (cache_id,)) | |||
results = cursor.fetchall() | |||
if results: # Cache hit | |||
num_results = results[0][1] * (10 ** results[0][2]) | |||
ids = [res[0] for res in results] | |||
else: # Cache miss | |||
## TODO: build and execute search query | |||
results = cursor.fetchall() | |||
ids = NotImplemented ## TODO: extract ids from results | |||
num_results = NotImplemented ## TODO: num if results else 0 | |||
num_exp = max(len(str(num_results)) - 3, 0) | |||
num_results = int(round(num_results, -num_exp)) | |||
num_mnt = num_results / (10 ** num_exp) | |||
cursor.execute(query2, (cache_id, num_mnt, num_exp)) | |||
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) | |||
return (num_results, self._get_codelets_from_ids(cursor, ids)) | |||
def insert(self, codelet): | |||
""" | |||
Insert a codelet into the database. | |||
:param codelet: The codelet to insert. | |||
:type codelet: :py:class:`.Codelet` | |||
""" | |||
query1 = """INSERT INTO code VALUES (?, ?, ?) | |||
ON DUPLICATE KEY UPDATE code_id=code_id""" | |||
query2 = """INSERT INTO codelets VALUES | |||
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)""" | |||
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" | |||
hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8") | |||
code_id = mmh3.hash64(hash_key)[0] | |||
with self._conn.cursor() as cursor: | |||
cursor.execute(query1, (code_id, codelet.language, codelet.code)) | |||
if cursor.rowcount == 1: | |||
for sym_type, symbols in codelet.symbols.iteritems(): | |||
self._insert_symbols(cursor, code_id, sym_type, symbols) | |||
origin, url = self._decompose_url(cursor, codelet.url) | |||
cursor.execute(query2, (codelet.name, code_id, origin, url, | |||
codelet.rank, codelet.date_created, | |||
codelet.date_modified)) | |||
codelet_id = cursor.lastrowid | |||
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] | |||
cursor.executemany(query3, authors) |
@@ -0,0 +1,97 @@ | |||
""" | |||
Contains information about database schema versions, and SQL queries to update | |||
between them. | |||
""" | |||
VERSION = 6 | |||
MIGRATIONS = [ | |||
# 1 -> 2 | |||
[ | |||
"""ALTER TABLE `codelets` | |||
DROP FOREIGN KEY `codelets_ibfk_1`""", | |||
"""ALTER TABLE `code` | |||
DROP KEY `code_hash`, | |||
DROP COLUMN `code_hash`, | |||
MODIFY COLUMN `code_id` BIGINT NOT NULL""", | |||
"""ALTER TABLE `codelets` | |||
MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL, | |||
ADD KEY (`codelet_lang`), | |||
ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE RESTRICT ON UPDATE CASCADE""", | |||
"""ALTER TABLE `symbols` | |||
ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL, | |||
ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL""" | |||
], | |||
# 2 -> 3 | |||
[ | |||
"""ALTER TABLE `symbols` | |||
DROP FOREIGN KEY `symbols_ibfk_1`, | |||
CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL, | |||
ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE""" | |||
], | |||
# 3 -> 4 | |||
[ | |||
"""ALTER TABLE `symbols` | |||
DROP COLUMN `symbol_row`, | |||
DROP COLUMN `symbol_col`, | |||
DROP COLUMN `symbol_end_row`, | |||
DROP COLUMN `symbol_end_col`""", | |||
"""CREATE TABLE `symbol_locations` ( | |||
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`sloc_symbol` BIGINT UNSIGNED NOT NULL, | |||
`sloc_type` TINYINT UNSIGNED NOT NULL, | |||
`sloc_row` INT UNSIGNED NOT NULL, | |||
`sloc_col` INT UNSIGNED NOT NULL, | |||
`sloc_end_row` INT UNSIGNED NOT NULL, | |||
`sloc_end_col` INT UNSIGNED NOT NULL, | |||
PRIMARY KEY (`sloc_id`), | |||
FOREIGN KEY (`sloc_symbol`) | |||
REFERENCES `symbols` (`symbol_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB""" | |||
], | |||
# 4 -> 5 | |||
[ | |||
"""ALTER TABLE `origins` | |||
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, | |||
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, | |||
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" | |||
], | |||
# 5 -> 6 | |||
[ | |||
"""ALTER TABLE `code` | |||
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL | |||
AFTER `code_id`, | |||
ADD KEY (`code_lang`)""", | |||
"""ALTER TABLE `codelets` | |||
DROP KEY `codelet_lang`, | |||
DROP COLUMN `codelet_lang`""", | |||
"""ALTER TABLE `cache_data` | |||
DROP FOREIGN KEY `cache_data_ibfk_1`""", | |||
"""ALTER TABLE `cache` | |||
MODIFY COLUMN `cache_id` BIGINT NOT NULL, | |||
DROP COLUMN `cache_hash`, | |||
DROP COLUMN `cache_last_used`, | |||
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""", | |||
"""ALTER TABLE `cache_data` | |||
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL, | |||
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`) | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE""", | |||
"""CREATE EVENT `flush_cache` | |||
ON SCHEDULE EVERY 1 HOUR | |||
DO | |||
DELETE FROM `cache` | |||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);""" | |||
] | |||
] | |||
if __name__ == "__main__": | |||
from . import Database | |||
Database(migrate=True).close() |
@@ -0,0 +1,114 @@ | |||
-- Schema version 6 | |||
CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci; | |||
USE `bitshift`; | |||
CREATE TABLE `version` ( | |||
`version` INT UNSIGNED NOT NULL | |||
) ENGINE=InnoDB; | |||
INSERT INTO `version` VALUES (6); | |||
CREATE TABLE `origins` ( | |||
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`origin_name` VARCHAR(64) DEFAULT NULL, | |||
`origin_url` VARCHAR(512) DEFAULT NULL, | |||
`origin_url_base` VARCHAR(512) DEFAULT NULL, | |||
`origin_image` BLOB DEFAULT NULL, | |||
PRIMARY KEY (`origin_id`) | |||
) ENGINE=InnoDB; | |||
INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL); | |||
CREATE TABLE `code` ( | |||
`code_id` BIGINT NOT NULL, | |||
`code_lang` SMALLINT UNSIGNED DEFAULT NULL, | |||
`code_code` MEDIUMTEXT NOT NULL, | |||
PRIMARY KEY (`code_id`), | |||
KEY (`code_lang`), | |||
FULLTEXT KEY (`code_code`) | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `codelets` ( | |||
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`codelet_name` VARCHAR(300) NOT NULL, | |||
`codelet_code_id` BIGINT NOT NULL, | |||
`codelet_origin` TINYINT UNSIGNED NOT NULL, | |||
`codelet_url` VARCHAR(512) NOT NULL, | |||
`codelet_rank` FLOAT NOT NULL, | |||
`codelet_date_created` DATETIME DEFAULT NULL, | |||
`codelet_date_modified` DATETIME DEFAULT NULL, | |||
PRIMARY KEY (`codelet_id`), | |||
FULLTEXT KEY (`codelet_name`), | |||
KEY (`codelet_rank`), | |||
KEY (`codelet_date_created`), | |||
KEY (`codelet_date_modified`), | |||
FOREIGN KEY (`codelet_code_id`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE RESTRICT ON UPDATE CASCADE, | |||
FOREIGN KEY (`codelet_origin`) | |||
REFERENCES `origins` (`origin_id`) | |||
ON DELETE RESTRICT ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `authors` ( | |||
`author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`author_codelet` BIGINT UNSIGNED NOT NULL, | |||
`author_name` VARCHAR(128) NOT NULL, | |||
`author_url` VARCHAR(512) DEFAULT NULL, | |||
PRIMARY KEY (`author_id`), | |||
FULLTEXT KEY (`author_name`), | |||
FOREIGN KEY (`author_codelet`) | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `symbols` ( | |||
`symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`symbol_code` BIGINT NOT NULL, | |||
`symbol_type` TINYINT UNSIGNED NOT NULL, | |||
`symbol_name` VARCHAR(512) NOT NULL, | |||
PRIMARY KEY (`symbol_id`), | |||
KEY (`symbol_type`, `symbol_name`(32)), | |||
FOREIGN KEY (`symbol_code`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `symbol_locations` ( | |||
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`sloc_symbol` BIGINT UNSIGNED NOT NULL, | |||
`sloc_type` TINYINT UNSIGNED NOT NULL, | |||
`sloc_row` INT UNSIGNED NOT NULL, | |||
`sloc_col` INT UNSIGNED NOT NULL, | |||
`sloc_end_row` INT UNSIGNED NOT NULL, | |||
`sloc_end_col` INT UNSIGNED NOT NULL, | |||
PRIMARY KEY (`sloc_id`), | |||
FOREIGN KEY (`sloc_symbol`) | |||
REFERENCES `symbols` (`symbol_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `cache` ( | |||
`cache_id` BIGINT NOT NULL, | |||
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL, | |||
`cache_count_exp` TINYINT UNSIGNED NOT NULL, | |||
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||
PRIMARY KEY (`cache_id`) | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `cache_data` ( | |||
`cdata_cache` BIGINT NOT NULL, | |||
`cdata_codelet` BIGINT UNSIGNED NOT NULL, | |||
PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||
FOREIGN KEY (`cdata_cache`) | |||
REFERENCES `cache` (`cache_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE, | |||
FOREIGN KEY (`cdata_codelet`) | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE EVENT `flush_cache` | |||
ON SCHEDULE EVERY 1 HOUR | |||
DO | |||
DELETE FROM `cache` | |||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY); |
@@ -6,4 +6,6 @@ __all__ = ["parse_query"] | |||
def parse_query(query): | |||
# gets a string, returns a Tree | |||
# TODO: note: resultant Trees should be normalized so that "foo OR bar" | |||
# and "bar OR foo" result in equivalent trees | |||
pass |
@@ -4,8 +4,9 @@ setup( | |||
name = "bitshift", | |||
version = "0.1", | |||
packages = find_packages(), | |||
install_requires = ["Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", | |||
"BeautifulSoup>=3.2.1"], | |||
install_requires = [ | |||
"Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", | |||
"BeautifulSoup>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"], | |||
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", | |||
license = "MIT", | |||
url = "https://github.com/earwig/bitshift" | |||