Przeglądaj źródła

Merge branch 'feature/database' into develop

tags/v1.0^2
Ben Kurtovic 10 lat temu
rodzic
commit
a3b1f6d0c3
8 zmienionych plików z 376 dodań i 23 usunięć
  1. +1
    -0
      .gitignore
  2. +6
    -3
      app.py
  3. +0
    -18
      bitshift/database.py
  4. +153
    -0
      bitshift/database/__init__.py
  5. +97
    -0
      bitshift/database/migration.py
  6. +114
    -0
      bitshift/database/schema.sql
  7. +2
    -0
      bitshift/query/__init__.py
  8. +3
    -2
      setup.py

+ 1
- 0
.gitignore Wyświetl plik

@@ -1,5 +1,6 @@
.sass-cache
.DS_Store
.my.cnf

# github premade rules
*.py[cod]


+ 6
- 3
app.py Wyświetl plik

@@ -5,6 +5,7 @@ Module to contain all the project's Flask server plumbing.
from flask import Flask
from flask import render_template, session

from bitshift.database import Database
from bitshift.query import parse_query

app = Flask(__name__)
@@ -12,7 +13,9 @@ app.config.from_object("bitshift.config")

app_env = app.jinja_env
app_env.line_statement_prefix = "="
app_env.globals.update(assets = assets)
app_env.globals.update(assets=assets)

database = Database()

@app.route("/")
def index():
@@ -20,8 +23,8 @@ def index():

@app.route("/search/<query>")
def search(query):
## tree = parse_query(query)
## database.search(tree)
tree = parse_query(query)
database.search(tree)
pass

if __name__ == "__main__":


+ 0
- 18
bitshift/database.py Wyświetl plik

@@ -1,18 +0,0 @@
"""
Module with classes and functions to handle communication with the MySQL
database backend, which manages the search index.
"""

import oursql

class Database(object):
"""Represents the MySQL database."""

def __init__(self):
pass

def _connect(self):
pass

def _create(self):
pass

+ 153
- 0
bitshift/database/__init__.py Wyświetl plik

@@ -0,0 +1,153 @@
"""
Subpackage with classes and functions to handle communication with the MySQL
database backend, which manages the search index.
"""

import os

import mmh3
import oursql

from .migration import VERSION, MIGRATIONS

__all__ = ["Database"]

class Database(object):
"""Represents the MySQL database."""

def __init__(self, migrate=False):
self._conn = self._connect()
self._check_version(migrate)

def _connect(self):
"""Establish a connection to the database."""
root = os.path.dirname(os.path.abspath(__file__))
default_file = os.path.join(root, ".my.cnf")
return oursql.connect(db="bitshift", read_default_file=default_file,
autoping=True, autoreconnect=True)

def _migrate(self, cursor, current):
"""Migrate the database to the latest schema version."""
for version in xrange(current, VERSION):
print "Migrating to %d..." % version + 1
for query in MIGRATIONS[version - 1]:
cursor.execute(query)
cursor.execute("UPDATE version SET version = ?", (version + 1,))

def _check_version(self, migrate):
"""Check the database schema version and respond accordingly.

If the schema is out of date, migrate if *migrate* is True, else raise
an exception.
"""
with self._conn.cursor() as cursor:
cursor.execute("SELECT version FROM version")
version = cursor.fetchone()[0]
if version < VERSION:
if migrate:
self._migrate(cursor, version)
else:
err = "Database schema out of date. " \
"Run `python -m bitshift.database.migration`."
raise RuntimeError(err)

def _get_codelets_from_ids(self, cursor, ids):
"""Return a list of Codelet objects given a list of codelet IDs."""
raise NotImplementedError() ## TODO

def _decompose_url(self, cursor, url):
"""Break up a URL into an origin (with a URL base) and a suffix."""
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base))
FROM origins
WHERE origin_url_base IS NOT NULL
AND ? LIKE CONCAT(origin_url_base, "%")"""

cursor.execute(query, (url, url))
result = cursor.fetchone()
return result if result else (1, url)

def _insert_symbols(self, cursor, code_id, sym_type, symbols):
"""Insert a list of symbols of a given type into the database."""
sym_types = ["functions", "classes", "variables"]
query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)"
query2 = """INSERT INTO symbol_locations VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?)"""

for (name, decls, uses) in symbols:
cursor.execute(query1, (code_id, sym_types.index(sym_type), name))
sym_id = cursor.lastrowid
params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] +
[tuple([sym_id, 1] + list(loc)) for loc in uses])
cursor.executemany(query2, params)

def close(self):
"""Disconnect from the database."""
self._conn.close()

def search(self, query, page=1):
"""
Search the database for a query and return the *n*\ th page of results.

:param query: The query to search for.
:type query: :py:class:`~.query.tree.Tree`
:param page: The result page to display.
:type page: int

:return: The total number of results, and the *n*\ th page of results.
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s)
"""
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp
FROM cache
INNER JOIN cache_data ON cache_id = cdata_cache
WHERE cache_id = ?"""
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)"
query3 = "INSERT INTO cache_data VALUES (?, ?)"

cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0]

with self._conn.cursor() as cursor:
cursor.execute(query1, (cache_id,))
results = cursor.fetchall()
if results: # Cache hit
num_results = results[0][1] * (10 ** results[0][2])
ids = [res[0] for res in results]
else: # Cache miss
## TODO: build and execute search query
results = cursor.fetchall()
ids = NotImplemented ## TODO: extract ids from results
num_results = NotImplemented ## TODO: num if results else 0
num_exp = max(len(str(num_results)) - 3, 0)
num_results = int(round(num_results, -num_exp))
num_mnt = num_results / (10 ** num_exp)
cursor.execute(query2, (cache_id, num_mnt, num_exp))
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids])
return (num_results, self._get_codelets_from_ids(cursor, ids))

def insert(self, codelet):
"""
Insert a codelet into the database.

:param codelet: The codelet to insert.
:type codelet: :py:class:`.Codelet`
"""
query1 = """INSERT INTO code VALUES (?, ?, ?)
ON DUPLICATE KEY UPDATE code_id=code_id"""
query2 = """INSERT INTO codelets VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)"""
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)"

hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8")
code_id = mmh3.hash64(hash_key)[0]

with self._conn.cursor() as cursor:
cursor.execute(query1, (code_id, codelet.language, codelet.code))
if cursor.rowcount == 1:
for sym_type, symbols in codelet.symbols.iteritems():
self._insert_symbols(cursor, code_id, sym_type, symbols)
origin, url = self._decompose_url(cursor, codelet.url)
cursor.execute(query2, (codelet.name, code_id, origin, url,
codelet.rank, codelet.date_created,
codelet.date_modified))
codelet_id = cursor.lastrowid
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]
cursor.executemany(query3, authors)

+ 97
- 0
bitshift/database/migration.py Wyświetl plik

@@ -0,0 +1,97 @@
"""
Contains information about database schema versions, and SQL queries to update
between them.
"""

VERSION = 6

MIGRATIONS = [
# 1 -> 2
[
"""ALTER TABLE `codelets`
DROP FOREIGN KEY `codelets_ibfk_1`""",
"""ALTER TABLE `code`
DROP KEY `code_hash`,
DROP COLUMN `code_hash`,
MODIFY COLUMN `code_id` BIGINT NOT NULL""",
"""ALTER TABLE `codelets`
MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL,
ADD KEY (`codelet_lang`),
ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`)
REFERENCES `code` (`code_id`)
ON DELETE RESTRICT ON UPDATE CASCADE""",
"""ALTER TABLE `symbols`
ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL,
ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL"""
],
# 2 -> 3
[
"""ALTER TABLE `symbols`
DROP FOREIGN KEY `symbols_ibfk_1`,
CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL,
ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`)
REFERENCES `code` (`code_id`)
ON DELETE CASCADE ON UPDATE CASCADE"""
],
# 3 -> 4
[
"""ALTER TABLE `symbols`
DROP COLUMN `symbol_row`,
DROP COLUMN `symbol_col`,
DROP COLUMN `symbol_end_row`,
DROP COLUMN `symbol_end_col`""",
"""CREATE TABLE `symbol_locations` (
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`sloc_symbol` BIGINT UNSIGNED NOT NULL,
`sloc_type` TINYINT UNSIGNED NOT NULL,
`sloc_row` INT UNSIGNED NOT NULL,
`sloc_col` INT UNSIGNED NOT NULL,
`sloc_end_row` INT UNSIGNED NOT NULL,
`sloc_end_col` INT UNSIGNED NOT NULL,
PRIMARY KEY (`sloc_id`),
FOREIGN KEY (`sloc_symbol`)
REFERENCES `symbols` (`symbol_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB"""
],
# 4 -> 5
[
"""ALTER TABLE `origins`
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL,
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL,
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL"""
],
# 5 -> 6
[
"""ALTER TABLE `code`
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL
AFTER `code_id`,
ADD KEY (`code_lang`)""",
"""ALTER TABLE `codelets`
DROP KEY `codelet_lang`,
DROP COLUMN `codelet_lang`""",
"""ALTER TABLE `cache_data`
DROP FOREIGN KEY `cache_data_ibfk_1`""",
"""ALTER TABLE `cache`
MODIFY COLUMN `cache_id` BIGINT NOT NULL,
DROP COLUMN `cache_hash`,
DROP COLUMN `cache_last_used`,
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""",
"""ALTER TABLE `cache_data`
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL,
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE""",
"""CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);"""
]
]

if __name__ == "__main__":
from . import Database

Database(migrate=True).close()

+ 114
- 0
bitshift/database/schema.sql Wyświetl plik

@@ -0,0 +1,114 @@
-- Schema version 6

CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
USE `bitshift`;

CREATE TABLE `version` (
`version` INT UNSIGNED NOT NULL
) ENGINE=InnoDB;
INSERT INTO `version` VALUES (6);

CREATE TABLE `origins` (
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT,
`origin_name` VARCHAR(64) DEFAULT NULL,
`origin_url` VARCHAR(512) DEFAULT NULL,
`origin_url_base` VARCHAR(512) DEFAULT NULL,
`origin_image` BLOB DEFAULT NULL,
PRIMARY KEY (`origin_id`)
) ENGINE=InnoDB;
INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL);

CREATE TABLE `code` (
`code_id` BIGINT NOT NULL,
`code_lang` SMALLINT UNSIGNED DEFAULT NULL,
`code_code` MEDIUMTEXT NOT NULL,
PRIMARY KEY (`code_id`),
KEY (`code_lang`),
FULLTEXT KEY (`code_code`)
) ENGINE=InnoDB;

CREATE TABLE `codelets` (
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`codelet_name` VARCHAR(300) NOT NULL,
`codelet_code_id` BIGINT NOT NULL,
`codelet_origin` TINYINT UNSIGNED NOT NULL,
`codelet_url` VARCHAR(512) NOT NULL,
`codelet_rank` FLOAT NOT NULL,
`codelet_date_created` DATETIME DEFAULT NULL,
`codelet_date_modified` DATETIME DEFAULT NULL,
PRIMARY KEY (`codelet_id`),
FULLTEXT KEY (`codelet_name`),
KEY (`codelet_rank`),
KEY (`codelet_date_created`),
KEY (`codelet_date_modified`),
FOREIGN KEY (`codelet_code_id`)
REFERENCES `code` (`code_id`)
ON DELETE RESTRICT ON UPDATE CASCADE,
FOREIGN KEY (`codelet_origin`)
REFERENCES `origins` (`origin_id`)
ON DELETE RESTRICT ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `authors` (
`author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`author_codelet` BIGINT UNSIGNED NOT NULL,
`author_name` VARCHAR(128) NOT NULL,
`author_url` VARCHAR(512) DEFAULT NULL,
PRIMARY KEY (`author_id`),
FULLTEXT KEY (`author_name`),
FOREIGN KEY (`author_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `symbols` (
`symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`symbol_code` BIGINT NOT NULL,
`symbol_type` TINYINT UNSIGNED NOT NULL,
`symbol_name` VARCHAR(512) NOT NULL,
PRIMARY KEY (`symbol_id`),
KEY (`symbol_type`, `symbol_name`(32)),
FOREIGN KEY (`symbol_code`)
REFERENCES `code` (`code_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `symbol_locations` (
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`sloc_symbol` BIGINT UNSIGNED NOT NULL,
`sloc_type` TINYINT UNSIGNED NOT NULL,
`sloc_row` INT UNSIGNED NOT NULL,
`sloc_col` INT UNSIGNED NOT NULL,
`sloc_end_row` INT UNSIGNED NOT NULL,
`sloc_end_col` INT UNSIGNED NOT NULL,
PRIMARY KEY (`sloc_id`),
FOREIGN KEY (`sloc_symbol`)
REFERENCES `symbols` (`symbol_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `cache` (
`cache_id` BIGINT NOT NULL,
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL,
`cache_count_exp` TINYINT UNSIGNED NOT NULL,
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`cache_id`)
) ENGINE=InnoDB;

CREATE TABLE `cache_data` (
`cdata_cache` BIGINT NOT NULL,
`cdata_codelet` BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
FOREIGN KEY (`cdata_cache`)
REFERENCES `cache` (`cache_id`)
ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (`cdata_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);

+ 2
- 0
bitshift/query/__init__.py Wyświetl plik

@@ -6,4 +6,6 @@ __all__ = ["parse_query"]

def parse_query(query):
# gets a string, returns a Tree
# TODO: note: resultant Trees should be normalized so that "foo OR bar"
# and "bar OR foo" result in equivalent trees
pass

+ 3
- 2
setup.py Wyświetl plik

@@ -4,8 +4,9 @@ setup(
name = "bitshift",
version = "0.1",
packages = find_packages(),
install_requires = ["Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0",
"BeautifulSoup>=3.2.1"],
install_requires = [
"Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0",
"BeautifulSoup>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"],
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak",
license = "MIT",
url = "https://github.com/earwig/bitshift"


Ładowanie…
Anuluj
Zapisz