Conflicts: static/js/index.js templates/index.htmltags/v1.0^2
@@ -18,12 +18,14 @@ class Codelet(object): | |||
code was last modified. | |||
:ivar rank: (float) A quanitification of the source code's quality, as | |||
per available ratings (stars, forks, upvotes, etc.). | |||
:ivar symbols: (dict) Dictionary containing dictionaries of functions, classes, | |||
variable definitions, etc. | |||
:ivar symbols: (dict) Dictionary containing dictionaries of functions, | |||
classes, variable definitions, etc. | |||
:ivar origin: (tuple) 3-tuple of (site_name, site_url, image_blob), as | |||
added by the database. | |||
""" | |||
def __init__(self, name, code, filename, language, authors, code_url, | |||
date_created, date_modified, rank): | |||
date_created, date_modified, rank, symbols=None, origin=None): | |||
""" | |||
Create a Codelet instance. | |||
@@ -36,6 +38,8 @@ class Codelet(object): | |||
:param date_created: see :attr:`self.date_created` | |||
:param date_modified: see :attr:`self.date_modified` | |||
:param rank: see :attr:`self.rank` | |||
:param symbols: see :attr:`self.symbols` | |||
:param origin: see :attr:`self.origin` | |||
:type name: see :attr:`self.name` | |||
:type code: see :attr:`self.code` | |||
@@ -46,6 +50,8 @@ class Codelet(object): | |||
:type date_created: see :attr:`self.date_created` | |||
:type date_modified: see :attr:`self.date_modified` | |||
:type rank: see :attr:`self.rank` | |||
:type symbols: see :attr:`self.symbols` | |||
:type origin: see :attr:`self.origin` | |||
""" | |||
self.name = name | |||
@@ -57,3 +63,5 @@ class Codelet(object): | |||
self.date_created = date_created | |||
self.date_modified = date_modified | |||
self.rank = rank | |||
self.symbols = symbols or {} | |||
self.origin = origin or (None, None, None) |
@@ -9,6 +9,9 @@ import mmh3 | |||
import oursql | |||
from .migration import VERSION, MIGRATIONS | |||
from ..codelet import Codelet | |||
from ..query.nodes import (String, Regex, Text, Language, Author, Date, Symbol, | |||
BinaryOp, UnaryOp) | |||
__all__ = ["Database"] | |||
@@ -51,9 +54,71 @@ class Database(object): | |||
"Run `python -m bitshift.database.migration`." | |||
raise RuntimeError(err) | |||
def _search_with_query(self, cursor, tree, page): | |||
"""Execute an SQL query based on a query tree, and return results. | |||
The returned data is a 2-tuple of (list of codelet IDs, estimated | |||
number of total results). | |||
""" | |||
query, args = tree.build_query(page) | |||
cursor.execute(query, args) | |||
ids = [id for id, _ in cursor.fetchall()] | |||
num_results = 0 # TODO: NotImplemented | |||
return ids, num_results | |||
def _get_authors_for_codelet(self, cursor, codelet_id): | |||
"""Return a list of authors for a given codelet.""" | |||
query = """SELECT author_name, author_url | |||
FROM authors | |||
WHERE author_codelet = ?""" | |||
cursor.execute(query, (codelet_id,)) | |||
return cursor.fetchall() | |||
def _get_symbols_for_code(self, cursor, code_id): | |||
"""Return a list of symbols for a given codelet.""" | |||
query = """SELECT symbol_type, symbol_name, sloc_type, sloc_row, | |||
sloc_col, sloc_end_row, sloc_end_col | |||
FROM symbols | |||
INNER JOIN symbol_locations ON sloc_symbol = symbol_id | |||
WHERE symbol_code = ?""" | |||
symbols = {type_: {} for type_ in Symbol.TYPES_INV} | |||
cursor.execute(query, (code_id,)) | |||
for type_, name, loc_type, row, col, erow, ecol in cursor.fetchall(): | |||
sdict = symbols[Symbol.TYPES_INV[type_]] | |||
if name not in sdict: | |||
sdict[name] = ((), ()) | |||
sdict[name][loc_type].append((row, col, erow, ecol)) | |||
for type_, sdict in symbols.items(): | |||
symbols[type_] = [(n, d, u) for n, (d, u) in sdict.iteritems()] | |||
return symbols | |||
def _get_codelets_from_ids(self, cursor, ids): | |||
"""Return a list of Codelet objects given a list of codelet IDs.""" | |||
raise NotImplementedError() ## TODO | |||
query = """SELECT * | |||
FROM codelets | |||
INNER JOIN code ON codelet_code_id = code_id | |||
INNER JOIN origins ON codelet_origin = origin_id | |||
WHERE codelet_id = ?""" | |||
with self._conn.cursor(oursql.DictCursor) as dict_cursor: | |||
dict_cursor.executemany(query, [(id,) for id in ids]) | |||
for row in dict_cursor.fetchone(): | |||
codelet_id = row["codelet_id"] | |||
if row["origin_url_base"]: | |||
url = row["codelet_url"] | |||
else: | |||
url = row["origin_url_base"] + row["codelet_url"] | |||
origin = (row["origin_name"], row["origin_url"], | |||
row["origin_image"]) | |||
authors = self._get_authors_for_codelet(cursor, codelet_id) | |||
symbols = self._get_symbols_for_code(cursor, row["code_id"]) | |||
yield Codelet( | |||
row["codelet_name"], row["code_code"], None, | |||
row["code_lang"], authors, url, | |||
row["codelet_date_created"], row["codelet_date_modified"], | |||
row["codelet_rank"], symbols, origin) | |||
def _decompose_url(self, cursor, url): | |||
"""Break up a URL into an origin (with a URL base) and a suffix.""" | |||
@@ -68,13 +133,12 @@ class Database(object): | |||
def _insert_symbols(self, cursor, code_id, sym_type, symbols): | |||
"""Insert a list of symbols of a given type into the database.""" | |||
sym_types = ["functions", "classes", "variables"] | |||
query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)" | |||
query2 = """INSERT INTO symbol_locations VALUES | |||
(DEFAULT, ?, ?, ?, ?, ?, ?)""" | |||
for (name, decls, uses) in symbols: | |||
cursor.execute(query1, (code_id, sym_types.index(sym_type), name)) | |||
cursor.execute(query1, (code_id, Symbol.TYPES_INV[sym_type], name)) | |||
sym_id = cursor.lastrowid | |||
params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] + | |||
[tuple([sym_id, 1] + list(loc)) for loc in uses]) | |||
@@ -112,16 +176,14 @@ class Database(object): | |||
num_results = results[0][1] * (10 ** results[0][2]) | |||
ids = [res[0] for res in results] | |||
else: # Cache miss | |||
## TODO: build and execute search query | |||
results = cursor.fetchall() | |||
ids = NotImplemented ## TODO: extract ids from results | |||
num_results = NotImplemented ## TODO: num if results else 0 | |||
ids, num_results = self._search_with_query(cursor, query, page) | |||
num_exp = max(len(str(num_results)) - 3, 0) | |||
num_results = int(round(num_results, -num_exp)) | |||
num_mnt = num_results / (10 ** num_exp) | |||
cursor.execute(query2, (cache_id, num_mnt, num_exp)) | |||
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) | |||
return (num_results, self._get_codelets_from_ids(cursor, ids)) | |||
codelet_gen = self._get_codelets_from_ids(cursor, ids) | |||
return (num_results, list(codelet_gen)) | |||
def insert(self, codelet): | |||
""" | |||
@@ -21,7 +21,7 @@ def _lang(codelet): | |||
if codelet.filename is not None: | |||
try: | |||
return pgl.guess_lexer_for_filename(codelet.filename, '').name | |||
return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name | |||
except: | |||
raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') | |||
@@ -1,11 +1,283 @@ | |||
from .association import Association | |||
from .node import Node | |||
""" | |||
This subpackage contains code to parse search queries received from the | |||
frontend into trees that can be used by the database backend. | |||
""" | |||
from __future__ import unicode_literals | |||
from re import IGNORECASE, search | |||
from sys import maxsize | |||
from dateutil.parser import parse as parse_date | |||
from .nodes import (String, Regex, Text, Language, Author, Date, Symbol, | |||
BinaryOp, UnaryOp) | |||
from .tree import Tree | |||
from ..languages import LANGS | |||
__all__ = ["parse_query"] | |||
__all__ = ["QueryParseException", "parse_query"] | |||
def parse_query(query): | |||
# gets a string, returns a Tree | |||
# TODO: note: resultant Trees should be normalized so that "foo OR bar" | |||
# and "bar OR foo" result in equivalent trees | |||
class QueryParseException(Exception): | |||
"""Raised by parse_query() when a query is invalid.""" | |||
pass | |||
class _QueryParser(object): | |||
"""Wrapper class with methods to parse queries. Used as a singleton.""" | |||
def __init__(self): | |||
self._prefixes = { | |||
self._parse_language: ["l", "lang", "language"], | |||
self._parse_author: ["a", "author"], | |||
self._parse_modified: ["m", "mod", "modified", "modify"], | |||
self._parse_created: ["cr", "create", "created"], | |||
self._parse_symbol: ["s", "sym", "symb", "symbol"], | |||
self._parse_function: ["f", "fn", "fun", "func", "function"], | |||
self._parse_class: ["cl", "class", "clss"], | |||
self._parse_variable: ["v", "var", "variable"] | |||
} | |||
def _parse_literal(self, literal): | |||
"""Parse part of a search query into a string or regular expression.""" | |||
if literal.startswith(("r:", "re:", "regex:", "regexp:")): | |||
return Regex(literal.split(":", 1)[1]) | |||
return String(literal) | |||
def _parse_language(self, term): | |||
"""Parse part of a query into a language node and return it.""" | |||
term = self._parse_literal(term) | |||
if isinstance(term, Regex): | |||
langs = [i for i, lang in enumerate(LANGS) | |||
if search(term.regex, lang, IGNORECASE)] | |||
if not langs: | |||
err = 'No languages found for regex: "%s"' % term.regex | |||
raise QueryParseException(err) | |||
node = Language(langs.pop()) | |||
while langs: | |||
node = BinaryOp(Language(langs.pop()), BinaryOp.OR, node) | |||
return node | |||
needle = term.string.lower() | |||
for i, lang in enumerate(LANGS): | |||
if lang.lower() == needle: | |||
return Language(i) | |||
for i, lang in enumerate(LANGS): | |||
if lang.lower().startswith(needle): | |||
return Language(i) | |||
err = 'No languages found for string: "%s"' % term.string | |||
raise QueryParseException(err) | |||
def _parse_author(self, term): | |||
"""Parse part of a query into an author node and return it.""" | |||
return Author(self._parse_literal(term)) | |||
def _parse_date(self, term, type_): | |||
"""Parse part of a query into a date node and return it.""" | |||
if ":" not in term: | |||
err = "A date relationship is required " \ | |||
'("before:<date>" or "after:<date>"): "%s"' | |||
raise QueryParseException(err % term) | |||
relstr, dtstr = term.split(":", 1) | |||
if relstr.lower() in ("before", "b"): | |||
relation = Date.BEFORE | |||
elif relstr.lower() in ("after", "a"): | |||
relation = Date.AFTER | |||
else: | |||
err = 'Bad date relationship (should be "before" or "after"): "%s"' | |||
raise QueryParseException(err % relstr) | |||
try: | |||
dt = parse_date(dtstr) | |||
except (TypeError, ValueError): | |||
raise QueryParseException('Bad date/time string: "%s"' % dtstr) | |||
return Date(type_, relation, dt) | |||
def _parse_modified(self, term): | |||
"""Parse part of a query into a date modified node and return it.""" | |||
return self._parse_date(term, Date.MODIFY) | |||
def _parse_created(self, term): | |||
"""Parse part of a query into a date created node and return it.""" | |||
return self._parse_date(term, Date.CREATE) | |||
def _parse_symbol(self, term): | |||
"""Parse part of a query into a symbol node and return it.""" | |||
return Symbol(Symbol.ALL, self._parse_literal(term)) | |||
def _parse_function(self, term): | |||
"""Parse part of a query into a function node and return it.""" | |||
return Symbol(Symbol.FUNCTION, self._parse_literal(term)) | |||
def _parse_class(self, term): | |||
"""Parse part of a query into a class node and return it.""" | |||
return Symbol(Symbol.CLASS, self._parse_literal(term)) | |||
def _parse_variable(self, term): | |||
"""Parse part of a query into a variable node and return it.""" | |||
return Symbol(Symbol.VARIABLE, self._parse_literal(term)) | |||
def _parse_term(self, term): | |||
"""Parse a query term into a tree node and return it.""" | |||
try: | |||
term = term.decode("unicode_escape") | |||
except UnicodeDecodeError: | |||
raise QueryParseException('Invalid query term: "%s"' % term) | |||
if ":" in term and not term[0] == ":": | |||
prefix, arg = term.split(":", 1) | |||
invert = prefix.lower() == "not" | |||
if invert: | |||
prefix, arg = arg.split(":", 1) | |||
if not arg: | |||
raise QueryParseException('Incomplete query term: "%s"' % term) | |||
for meth, prefixes in self._prefixes.iteritems(): | |||
if prefix.lower() in prefixes: | |||
if invert: | |||
return UnaryOp(UnaryOp.NOT, meth(arg)) | |||
return meth(arg) | |||
return Text(self._parse_literal(term)) | |||
def _scan_query(self, query, markers): | |||
"""Scan a query (sub)string for the first occurance of some markers. | |||
Returns a 2-tuple of (first_marker_found, marker_index). | |||
""" | |||
def is_escaped(query, index): | |||
"""Return whether a query marker is backslash-escaped.""" | |||
return (index > 0 and query[index - 1] == "\\" and | |||
(index < 2 or query[index - 2] != "\\")) | |||
best_marker, best_index = None, maxsize | |||
for marker in markers: | |||
index = query.find(marker) | |||
if is_escaped(query, index): | |||
_, new_index = self._scan_query(query[index + 1:], marker) | |||
index += new_index + 1 | |||
if index >= 0 and index < best_index: | |||
best_marker, best_index = marker, index | |||
return best_marker, best_index | |||
def _split_query(self, query, parens=False): | |||
"""Split a query string into a nested list of query terms. | |||
Returns a list of terms and/or nested sublists of terms. Each term and | |||
sublist is guarenteed to be non-empty. | |||
""" | |||
query = query.lstrip() | |||
if not query: | |||
return [] | |||
marker, index = self._scan_query(query, " \"'()") | |||
if not marker: | |||
return [query] | |||
nest = [query[:index]] if index > 0 else [] | |||
after = query[index + 1:] | |||
if marker == " ": | |||
nest += self._split_query(after, parens) | |||
elif marker in ('"', "'"): | |||
close_marker, close_index = self._scan_query(after, marker) | |||
if close_marker: | |||
if close_index > 0: | |||
nest.append(after[:close_index]) | |||
after = after[close_index + 1:] | |||
nest += self._split_query(after, parens) | |||
elif after: | |||
nest.append(after) | |||
elif marker == "(": | |||
inner, after = self._split_query(after, True), [] | |||
if inner and isinstance(inner[-1], tuple): | |||
after = self._split_query(inner.pop()[0], parens) | |||
if inner: | |||
nest.append(inner) | |||
if after: | |||
nest += after | |||
elif marker == ")": | |||
if parens: | |||
nest.append((after,)) | |||
else: | |||
nest += self._split_query(after) | |||
return nest | |||
def _parse_boolean_operators(self, nest): | |||
"""Parse boolean operators in a nested query list.""" | |||
op_lookup = { | |||
"and": BinaryOp.AND, | |||
"or": BinaryOp.OR, | |||
"not": UnaryOp.NOT | |||
} | |||
for i, term in enumerate(nest): | |||
if isinstance(term, list): | |||
self._parse_boolean_operators(term) | |||
else: | |||
nest[i] = op_lookup.get(term.lower(), term) | |||
def _parse_nest(self, nest): | |||
"""Recursively parse a nested list of search query terms.""" | |||
def parse_binary_op(op): | |||
"""Parse a binary operator in a nested query list.""" | |||
index = nest.index(op) | |||
if index == 0 or index == len(nest) - 1: | |||
err = "Invalid query: '%s' given without argument." | |||
raise QueryParseException(err % BinaryOp.OPS[op]) | |||
left = self._parse_nest(nest[:index]) | |||
right = self._parse_nest(nest[index + 1:]) | |||
return BinaryOp(left, op, right) | |||
if not nest: | |||
err = "Error while parsing query: empty nest detected." | |||
raise QueryParseException(err) | |||
elif BinaryOp.OR in nest: | |||
return parse_binary_op(BinaryOp.OR) | |||
elif BinaryOp.AND in nest: | |||
return parse_binary_op(BinaryOp.AND) | |||
elif UnaryOp.NOT in nest: | |||
index = nest.index(UnaryOp.NOT) | |||
if index == len(nest) - 1: | |||
err = "Invalid query: '%s' given without argument." | |||
raise QueryParseException(err % UnaryOp.OPS[UnaryOp.NOT]) | |||
right = UnaryOp(UnaryOp.NOT, self._parse_nest(nest[index + 1:])) | |||
if index > 0: | |||
left = self._parse_nest(nest[:index]) | |||
return BinaryOp(left, BinaryOp.AND, right) | |||
return right | |||
elif len(nest) > 1: | |||
left, right = self._parse_term(nest[0]), self._parse_nest(nest[1:]) | |||
return BinaryOp(left, BinaryOp.AND, right) | |||
elif isinstance(nest[0], list): | |||
return self._parse_nest(nest[0]) | |||
else: | |||
return self._parse_term(nest[0]) | |||
def _balance_tree(self, node): | |||
"""Auto-balance a tree using a string sorting function.""" | |||
if isinstance(node, BinaryOp): | |||
self._balance_tree(node.left) | |||
self._balance_tree(node.right) | |||
if node.right.sortkey() < node.left.sortkey(): | |||
node.left, node.right = node.right, node.left | |||
elif isinstance(node, UnaryOp): | |||
self._balance_tree(node.node) | |||
def parse(self, query): | |||
""" | |||
Parse a search query. | |||
The result is normalized with a sorting function so that | |||
``"foo OR bar"`` and ``"bar OR foo"`` result in the same tree. This is | |||
important for caching purposes. | |||
:param query: The query be converted. | |||
:type query: str | |||
:return: A tree storing the data in the query. | |||
:rtype: :py:class:`~.query.tree.Tree` | |||
:raises: :py:class:`.QueryParseException` | |||
""" | |||
nest = self._split_query(query.rstrip()) | |||
if not nest: | |||
raise QueryParseException('Empty query: "%s"' % query) | |||
self._parse_boolean_operators(nest) | |||
root = self._parse_nest(nest) | |||
self._balance_tree(root) | |||
return Tree(root) | |||
parse_query = _QueryParser().parse |
@@ -0,0 +1,276 @@ | |||
from ..languages import LANGS | |||
__all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol", | |||
"BinaryOp", "UnaryOp"] | |||
class _Node(object): | |||
"""Represents a single node in a query tree. | |||
Generally speaking, a node is a constraint applied to the database. Thus, | |||
a :py:class:`~.Language` node represents a constraint where only codelets | |||
of a specific language are selected. | |||
""" | |||
def sortkey(self): | |||
"""Return a string sort key for the node.""" | |||
return "" | |||
def parameterize(self, tables): | |||
"""Parameterize the node. | |||
Returns a 4-tuple of (conditional string, parameter list, rank list, | |||
should-we-rank boolean). If the rank list is empty, then it is assumed | |||
to contain the conditional string. | |||
""" | |||
return "", [], [], False | |||
class _Literal(object): | |||
"""Represents a literal component of a search query, present at the leaves. | |||
A literal might be a string or a regular expression. | |||
""" | |||
pass | |||
class String(_Literal): | |||
"""Represents a string literal.""" | |||
def __init__(self, string): | |||
""" | |||
:type string: unicode | |||
""" | |||
self.string = string | |||
def __repr__(self): | |||
return "String({0!r})".format(self.string) | |||
def sortkey(self): | |||
return self.string | |||
class Regex(_Literal): | |||
"""Represents a regular expression literal.""" | |||
def __init__(self, regex): | |||
""" | |||
:type string: unicode | |||
""" | |||
self.regex = regex | |||
def __repr__(self): | |||
return "Regex({0!r})".format(self.regex) | |||
def sortkey(self): | |||
return self.regex | |||
class Text(_Node): | |||
"""Represents a text node. | |||
Searches in codelet names (full-text search), symbols (equality), and | |||
source code (full-text search). | |||
""" | |||
def __init__(self, text): | |||
""" | |||
:type text: :py:class:`._Literal` | |||
""" | |||
self.text = text | |||
def __repr__(self): | |||
return "Text({0})".format(self.text) | |||
def sortkey(self): | |||
return self.text.sortkey() | |||
def parameterize(self, tables): | |||
tables |= {"code", "symbols"} | |||
if isinstance(self.text, Regex): | |||
ranks = ["(codelet_name REGEXP ?)", "(symbol_name REGEXP ?)", | |||
"(code_code REGEXP ?)"] | |||
text = self.text.regex | |||
else: | |||
ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))", | |||
"(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))", | |||
"(symbol_name = ?)"] | |||
text = self.text.string | |||
cond = "(" + " OR ".join(ranks) + ")" | |||
return cond, [text] * 3, ranks, True | |||
class Language(_Node): | |||
"""Represents a language node. | |||
Searches in the code_lang field. | |||
""" | |||
def __init__(self, lang): | |||
""" | |||
:type lang: int | |||
""" | |||
self.lang = lang | |||
def __repr__(self): | |||
return "Language({0})".format(LANGS[self.lang]) | |||
def sortkey(self): | |||
return LANGS[self.lang] | |||
def parameterize(self, tables): | |||
tables |= {"code"} | |||
return "(code_lang = ?)", [self.lang], [], False | |||
class Author(_Node): | |||
"""Represents a author node. | |||
Searches in the author_name field (full-text search). | |||
""" | |||
def __init__(self, name): | |||
""" | |||
:type name: :py:class:`_Literal` | |||
""" | |||
self.name = name | |||
def __repr__(self): | |||
return "Author({0})".format(self.name) | |||
def sortkey(self): | |||
return self.name.sortkey() | |||
def parameterize(self, tables): | |||
tables |= {"authors"} | |||
if isinstance(self.name, Regex): | |||
return "(author_name REGEXP ?)", [self.name.regex], [], False | |||
cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))" | |||
return cond, [self.name.string], [], True | |||
class Date(_Node): | |||
"""Represents a date node. | |||
Searches in the codelet_date_created or codelet_date_modified fields. | |||
""" | |||
CREATE = 1 | |||
MODIFY = 2 | |||
BEFORE = 1 | |||
AFTER = 2 | |||
def __init__(self, type_, relation, date): | |||
""" | |||
:type type_: int (``CREATE`` or ``MODIFY``) | |||
:type relation: int (``BEFORE``, ``AFTER``) | |||
:type date: datetime.datetime | |||
""" | |||
self.type = type_ | |||
self.relation = relation | |||
self.date = date | |||
def __repr__(self): | |||
types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"} | |||
relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"} | |||
tm = "Date({0}, {1}, {2})" | |||
return tm.format(types[self.type], relations[self.relation], self.date) | |||
def sortkey(self): | |||
return self.date.strftime("%Y%m%d%H%M%S") | |||
def parameterize(self, tables): | |||
column = {self.CREATE: "codelet_date_created", | |||
self.MODIFY: "codelet_date_modified"}[self.type] | |||
op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation] | |||
return "(" + column + " " + op + " ?)", [self.date], [], False | |||
class Symbol(_Node): | |||
"""Represents a symbol node. | |||
Searches in symbol_type and symbol_name. | |||
""" | |||
ALL = -1 | |||
FUNCTION = 0 | |||
CLASS = 1 | |||
VARIABLE = 2 | |||
TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"} | |||
TYPES_INV = ["functions", "classes", "variables"] | |||
def __init__(self, type_, name): | |||
""" | |||
:type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.) | |||
:type name: :py:class:`._Literal` | |||
""" | |||
self.type = type_ | |||
self.name = name | |||
def __repr__(self): | |||
type_ = self.TYPES.get(self.type, "ALL") | |||
return "Symbol({0}, {1})".format(type_, self.name) | |||
def sortkey(self): | |||
return self.name.sortkey() | |||
def parameterize(self, tables): | |||
tables |= {"code", "symbols"} | |||
if isinstance(self.name, Regex): | |||
cond, name = "symbol_name REGEXP ?", self.name.regex | |||
else: | |||
cond, name = "symbol_name = ?", self.name.string | |||
if self.type == self.ALL: | |||
types = ", ".join(str(type_) for type_ in self.TYPES) | |||
cond += " AND symbol_type IN (%s)" % types | |||
if self.type != self.ALL: | |||
cond += " AND symbol_type = %d" % self.type | |||
return "(" + cond + ")", [name], [], False | |||
class BinaryOp(_Node): | |||
"""Represents a relationship between two nodes: ``and``, ``or``.""" | |||
AND = object() | |||
OR = object() | |||
OPS = {AND: "AND", OR: "OR"} | |||
def __init__(self, left, op, right): | |||
self.left = left | |||
self.op = op | |||
self.right = right | |||
def __repr__(self): | |||
tmpl = "BinaryOp({0}, {1}, {2})" | |||
return tmpl.format(self.left, self.OPS[self.op], self.right) | |||
def sortkey(self): | |||
return self.left.sortkey() + self.right.sortkey() | |||
def parameterize(self, tables): | |||
lcond, largs, lranks, need_lranks = self.left.parameterize(tables) | |||
rcond, rargs, rranks, need_rranks = self.right.parameterize(tables) | |||
lranks, rranks = lranks or [lcond], rranks or [rcond] | |||
op = self.OPS[self.op] | |||
cond = "(" + lcond + " " + op + " " + rcond + ")" | |||
need_ranks = need_lranks or need_rranks or self.op == self.OR | |||
return cond, largs + rargs, lranks + rranks, need_ranks | |||
class UnaryOp(_Node): | |||
"""Represents a transformation applied to one node: ``not``.""" | |||
NOT = object() | |||
OPS = {NOT: "NOT"} | |||
def __init__(self, op, node): | |||
self.op = op | |||
self.node = node | |||
def __repr__(self): | |||
return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node) | |||
def sortkey(self): | |||
return self.node.sortkey() | |||
def parameterize(self, tables): | |||
cond, args, ranks, need_ranks = self.node.parameterize(tables) | |||
new_cond = "(" + self.OPS[self.op] + " " + cond + ")" | |||
ranks = ranks or [cond] | |||
return new_cond, args, ranks, need_ranks |
@@ -0,0 +1,69 @@ | |||
__all__ = ["Tree"] | |||
QUERY_TEMPLATE = """SELECT codelet_id, (codelet_rank%s) AS score | |||
FROM codelets %s | |||
WHERE %s | |||
GROUP BY codelet_id | |||
ORDER BY score DESC | |||
LIMIT %d OFFSET %d""".replace("\n", " ") | |||
class Tree(object): | |||
"""Represents a query tree.""" | |||
def __init__(self, root): | |||
self._root = root | |||
def __repr__(self): | |||
return "Tree({0})".format(self._root) | |||
@property | |||
def root(self): | |||
"""The root node of the tree.""" | |||
return self._root | |||
def sortkey(self): | |||
"""Return a string sort key for the query tree.""" | |||
return self._root.sortkey() | |||
def serialize(self): | |||
"""Create a string representation of the query for caching. | |||
:return: Query string representation. | |||
:rtype: str | |||
""" | |||
return repr(self) | |||
def build_query(self, page=1, page_size=10): | |||
"""Convert the query tree into a parameterized SQL SELECT statement. | |||
:param page: The page number to get results for. | |||
:type page: int | |||
:param page_size: The number of results per page. | |||
:type page_size: int | |||
:return: SQL query data. | |||
:rtype: 2-tuple of (SQL statement string, query parameter tuple) | |||
""" | |||
def get_table_joins(tables): | |||
data = [ | |||
("code", "codelet_code_id", "code_id"), | |||
("authors", "author_codelet", "codelet_id"), | |||
("symbols", "symbol_code", "code_id") | |||
] | |||
tmpl = "INNER JOIN %s ON %s = %s" | |||
for args in data: | |||
if args[0] in tables: | |||
yield tmpl % args | |||
tables = set() | |||
cond, arglist, ranks, need_ranks = self._root.parameterize(tables) | |||
ranks = ranks or [cond] | |||
if need_ranks: | |||
score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks)) | |||
else: | |||
score = "" | |||
joins = " ".join(get_table_joins(tables)) | |||
offset = (page - 1) * page_size | |||
query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset) | |||
return query, tuple(arglist * 2 if need_ranks else arglist) |
@@ -6,7 +6,8 @@ setup( | |||
packages = find_packages(), | |||
install_requires = [ | |||
"Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", | |||
"beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"], | |||
"beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3", | |||
"python-dateutil>=2.2"], | |||
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", | |||
license = "MIT", | |||
url = "https://github.com/earwig/bitshift" | |||
@@ -10,7 +10,6 @@ var searchBar = $("form#search-bar input[type='text']")[0]; | |||
var resultsDiv = $("div#results")[0]; | |||
var typingTimer, lastValue; | |||
/* | |||
* Set all page callbacks. | |||
*/ | |||
@@ -43,10 +42,32 @@ var typingTimer, lastValue; | |||
event.preventDefault(); | |||
return false; | |||
}); | |||
searchBar.onkeyup = typingTimer; | |||
}()); | |||
//Obtained by parsing python file with pygments | |||
var codeExample = '<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre> 1\n 2\n 3\n 4\n 5\n 6\n 7\n 8\n 9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n36\n37\n38\n39\n40</pre></div></td><td class="code"><div class="hll"><pre><span class="sd">"""</span>\n<span class="sd">Module to contain all the project's Flask server plumbing.</span>\n<span class="sd">"""</span>\n\n<span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">Flask</span>\n<span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">render_template</span><span class="p">,</span> <span class="n">session</span>\n\n<span class="kn">from</span> <span class="nn">bitshift</span> <span class="kn">import</span> <span class="n">assets</span>\n<span class="c"># from bitshift.database import Database</span>\n<span class="c"># from bitshift.query import parse_query</span>\n\n<span class="n">app</span> <span class="o">=</span> <span class="n">Flask</span><span class="p">(</span><span class="n">__name__</span><span class="p">)</span>\n<span class="n">app</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">from_object</span><span class="p">(</span><span class="s">"bitshift.config"</span><span class="p">)</span>\n\n<span class="n">app_env</span> <span class="o">=</span> <span class="n">app</span><span class="o">.</span><span class="n">jinja_env</span>\n<span class="n">app_env</span><span class="o">.</span><span class="n">line_statement_prefix</span> <span class="o">=</span> <span class="s">"="</span>\n<span class="n">app_env</span><span class="o">.</span><span class="n">globals</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">assets</span><span class="o">=</span><span class="n">assets</span><span class="p">)</span>\n\n<span class="c"># database = Database()</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">"/"</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">index</span><span class="p">():</span>\n <span class="k">return</span> <span class="n">render_template</span><span class="p">(</span><span class="s">"index.html"</span><span class="p">)</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">"/search/<query>"</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">):</span>\n <span class="c"># tree = parse_query(query)</span>\n <span class="c"># database.search(tree)</span>\n <span class="k">pass</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">"/about"</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">about</span><span class="p">():</span>\n <span class="k">return</span> <span class="n">render_template</span><span class="p">(</span><span class="s">"about.html"</span><span class="p">)</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">"/developers"</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">developers</span><span class="p">():</span>\n <span class="k">return</span> <span class="n">render_template</span><span class="p">(</span><span class="s">"developers.html"</span><span class="p">)</span>\n\n<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">"__main__"</span><span class="p">:</span>\n <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>\n</pre></div>\n</td></tr></table>' | |||
searchBar.onkeyup = typingTimer; | |||
var testCodelet = { | |||
'code_url': 'https://github.com/earwig/bitshift/blob/develop/app.py', | |||
'filename': 'app.py', | |||
'language': 'python', | |||
'date_created': 'May 10, 2014', | |||
'date_modified': '2 days ago', | |||
'origin': ['GitHub', 'https://github.com', ''], | |||
'authors': ['sevko', 'earwig'], | |||
'html_code': codeExample | |||
}; | |||
// Enable infinite scrolling down the results page. | |||
$(window).scroll(function() { | |||
var searchField = $("div#search-field"); | |||
if($(window).scrollTop() + $(window).height() == $(document).height() && searchField.hasClass('partly-visible')){ | |||
loadMoreResults(); | |||
} | |||
}); | |||
/* | |||
* Clear the existing timer and set a new one the the user types text into the | |||
* search bar. | |||
@@ -117,6 +138,98 @@ function populateResults(){ | |||
} | |||
/* | |||
* Create a result element based upon a codelet instance. | |||
* | |||
* @return {Element} The result element. | |||
*/ | |||
function createResult(codelet) { | |||
//Level 1 | |||
var newDiv = document.createElement("div"), | |||
table = document.createElement("table"), | |||
row = document.createElement("tr"); | |||
//Level 2 | |||
var displayInfo = document.createElement("div"), | |||
sidebar = document.createElement("td"), | |||
codeElt = document.createElement("td"), | |||
displayButton = document.createElement("td"), | |||
hiddenInfoContainer = document.createElement("td"), | |||
hiddenInfo = document.createElement("div"); | |||
//Level 3 | |||
var title = document.createElement("span"), | |||
site = document.createElement("span"), | |||
dateModified = document.createElement("span"), | |||
language = document.createElement("span"), | |||
dateCreated = document.createElement("span"), | |||
authors = document.createElement("div"); | |||
//Classes and ID's | |||
newDiv.classList.add('result'); | |||
displayInfo.id = 'display-info'; | |||
sidebar.id = 'sidebar'; | |||
codeElt.id = 'code'; | |||
displayButton.id = 'display-button'; | |||
hiddenInfo.id = 'hidden-info'; | |||
title.id = 'title'; | |||
site.id = 'site'; | |||
dateModified.id = 'date-modified'; | |||
language.id = 'language'; | |||
dateCreated.id = 'date-created'; | |||
authors.id = 'authors'; | |||
//Add the bulk of the html | |||
title.innerHTML = 'File <a href="' + codelet.code_url + '">' | |||
+ codelet.filename + '</a>'; | |||
site.innerHTML = 'on <a href="' + codelet.origin[1] + '">' + codelet.origin[0] +'</a>'; | |||
language.innerHTML = codelet.language; | |||
dateModified.innerHTML = 'Last modified ' + codelet.date_modified; | |||
// Needs to be changed from int to string on the server | |||
dateCreated.innerHTML = 'Created ' + codelet.date_created; | |||
authors.innerHTML = 'Authors: '; | |||
$.each(codelet.authors, function(i, a) { | |||
authors.innerHTML += '<a href=#>' + a + ' </a>'; | |||
}); | |||
sidebar.innerHTML = ''; | |||
// Needs to be processed on the server | |||
codeElt.innerHTML = '<div id=tablecontainer>' + codelet.html_code + '</div>'; | |||
//Event binding | |||
$(displayButton).hover(function(e) { | |||
$(row).addClass('display-all'); | |||
}); | |||
$(newDiv).on('transitionend', function(e) { | |||
$(newDiv).one('mouseleave', function(e) { | |||
$(row).removeClass('display-all'); | |||
}); | |||
}); | |||
//Finish and append elements to parent elements | |||
hiddenInfo.appendChild(dateCreated); | |||
hiddenInfo.appendChild(dateModified); | |||
hiddenInfo.appendChild(authors); | |||
hiddenInfoContainer.appendChild(hiddenInfo); | |||
row.appendChild(sidebar); | |||
row.appendChild(codeElt); | |||
row.appendChild(hiddenInfoContainer); | |||
row.appendChild(displayButton); | |||
table.appendChild(row); | |||
displayInfo.appendChild(title); | |||
displayInfo.appendChild(site); | |||
displayInfo.appendChild(language); | |||
newDiv.appendChild(displayInfo); | |||
newDiv.appendChild(table); | |||
return newDiv; | |||
} | |||
/* | |||
* AJAX the current query string to the server, and return its response. | |||
* | |||
* @return {Array} The server's response in the form of `div.result` DOM | |||
@@ -125,12 +238,7 @@ function populateResults(){ | |||
function queryServer(){ | |||
var resultDivs = [] | |||
for(var result = 0; result < 20; result++){ | |||
var newDiv = document.createElement("div"); | |||
newDiv.classList.add("result"); | |||
newDiv.innerHTML = Math.random(); | |||
newDiv.style.textAlign = "center"; | |||
newDiv.style.color = "#" + Math.floor(Math.random() * | |||
16777215).toString(16); | |||
var newDiv = createResult(testCodelet); | |||
resultDivs.push(newDiv); | |||
} | |||
@@ -6,6 +6,10 @@ | |||
@import variables | |||
$minSearchFieldsWidth: 490px | |||
$resultWidth: 1000px | |||
$sidebarWidth: 30px | |||
$codeWidth: 650px | |||
$hiddenInfoWidth: 250px | |||
.ui-datepicker | |||
font-size: 70% | |||
@@ -282,13 +286,124 @@ div#results | |||
margin-right: auto | |||
width: 80% | |||
/* TODO: | |||
1) Sidebar | |||
- Add way to cycle through hits in the code. | |||
2) Hidden info | |||
- Add links for authors. | |||
- Remove language field. | |||
3) Header | |||
- Add an icon for the website. | |||
- Add language tag. | |||
4) Code body | |||
- Add highlighting. | |||
5) Display button | |||
- unicode glyph */ | |||
div.result | |||
background-color: #F8F8F8 | |||
width: $resultWidth | |||
height: 200px | |||
margin-top: 2% | |||
margin-bottom: 10% | |||
margin-top: 1% | |||
padding: 1% | |||
table | |||
border-collapse: collapse | |||
border: 1px solid $baseColor3 | |||
height: inherit | |||
tr | |||
height: inherit | |||
&.cascade | |||
@extend .t3 | |||
margin-bottom: 0% | |||
div#display-info | |||
font-size: 1.3em | |||
padding: 5px 0px 5px 5px | |||
border: 1px dotted $baseColor3 | |||
border-bottom: none | |||
width: 400px | |||
a | |||
text-decoration: none | |||
&:hover | |||
color: orange | |||
#title | |||
margin-right: 10px | |||
#site | |||
text-transform: capitalize | |||
#language | |||
font-size: 0.8em | |||
font-weight: bold | |||
margin-left: 100px | |||
padding: 3px | |||
@include vendor(border-radius, 2px) | |||
background: #ddd | |||
color: orange | |||
td#sidebar | |||
width: $sidebarWidth | |||
background-color: #eee | |||
border-right: 1px solid $baseColor3 | |||
height: inherit | |||
td#code | |||
width: $codeWidth | |||
height: inherit | |||
border-right: 1px solid $baseColor3 | |||
@include vendor(transition, width 0.2s ease-in-out) | |||
.display-all & | |||
width: 500px | |||
#tablecontainer | |||
overflow: scroll | |||
width: 100% | |||
height: inherit | |||
background-color: #49483e | |||
table | |||
table-layout:fixed | |||
border-collapse: collapse | |||
border: none | |||
font-family: monospace | |||
td#display-button | |||
width: 25px | |||
background: url(https://cdn1.iconfinder.com/data/icons/windows-8-metro-style/512/View_Details-.png) | |||
background-size: 25px 25px | |||
background-repeat: no-repeat | |||
background-position: center | |||
.display-all & | |||
@include vendor(transform, rotateY(180deg)) | |||
div#hidden-info | |||
width: $hiddenInfoWidth | |||
margin-left: -$hiddenInfoWidth | |||
height: 100% | |||
padding-top: 40px | |||
font-size: 1.2em | |||
line-height: 1.5em | |||
@include vendor(transition, margin-left 0.2s ease-in-out) | |||
.display-all & | |||
margin-left: 0px | |||
padding-left: 20px | |||
#date-created | |||
display: inline-block | |||
#date-modified | |||
display: block | |||
#authors | |||
a | |||
text-decoration: none | |||
&:hover | |||
color: orange |
@@ -9,9 +9,7 @@ html, body | |||
height: 100% | |||
margin: 0 | |||
padding: 0 | |||
* | |||
font-family: sans-serif | |||
font-family: sans-serif | |||
div#container | |||
min-height: 100% | |||
@@ -8,6 +8,7 @@ | |||
{{ assets.tag("lib/jqueryui.custom.min.css") }} | |||
{{ assets.tag("lib/jquery.min.js") }} | |||
{{ assets.tag("lib/jquery-ui.min.js") }} | |||
{{ assets.tag("lib/highlight.css") }} | |||
{{ assets.tag("index.css") }} | |||
@@ -88,6 +89,31 @@ | |||
<span class="regex">Regex</span> | |||
</div> | |||
</div> | |||
<div id="lower-half"> | |||
<ul> | |||
<li> | |||
<label for="symbols">Symbols</label> | |||
<input type="text" name="symbols" id="symbols"><br> | |||
</li> | |||
<li> | |||
<label for="functions">Functions</label> | |||
<input type="text" name="functions" id="functions"><br> | |||
</li> | |||
<li> | |||
<label for="classes">Classes</label> | |||
<input type="text" name="classes" id="classes"><br> | |||
</li> | |||
<li> | |||
<label for="variables">Variables</label> | |||
<input type="text" name="variables" id="variables"><br> | |||
</li> | |||
</ul> | |||
</div> | |||
</div> | |||
</div> | |||
</form> | |||
@@ -0,0 +1,67 @@ | |||
from __future__ import unicode_literals | |||
import unittest | |||
from bitshift.query import parse_query | |||
TESTS = [ | |||
# Text | |||
("test", "Tree(Text(String(u'test')))"), | |||
("re:test", "Tree(Text(Regex(u'test')))"), | |||
# Language | |||
("language:python", "Tree(Language(Python))"), | |||
("language:py", "Tree(Language(Python))"), | |||
("l:r:r..y", "Tree(Language(Ruby))"), | |||
("lang:re:py|c", "Tree(BinaryOp(Language(C), OR, Language(Python)))"), | |||
# Author | |||
('"author:Ben Kurtovic"', "Tree(Author(String(u'Ben Kurtovic')))"), | |||
(r"'a:re:b.*?\sk.*?'", r"Tree(Author(Regex(u'b.*?\\sk.*?')))"), | |||
# Date | |||
("'create:before:Jan 1, 2014'", | |||
"Tree(Date(CREATE, BEFORE, 2014-01-01 00:00:00))"), | |||
("'modify:after:2010-05-09 10:11:12'", | |||
"Tree(Date(MODIFY, AFTER, 2010-05-09 10:11:12))"), | |||
# Symbol | |||
("sym:foobar", "Tree(Symbol(ALL, String(u'foobar')))"), | |||
("func:foo_bar", "Tree(Symbol(FUNCTION, String(u'foo_bar')))"), | |||
("func:foo_bar()", "Tree(Symbol(FUNCTION, String(u'foo_bar')))"), | |||
("class:FooBar", "Tree(Symbol(CLASS, String(u'FooBar')))"), | |||
("var:foobar", "Tree(Symbol(VARIABLE, String(u'foobar')))"), | |||
("var:r:foobar", "Tree(Symbol(VARIABLE, Regex(u'foobar')))"), | |||
# Composition | |||
("(a and b) or (c and d)", ", ".join([ | |||
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", | |||
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND", | |||
"Text(String(u'd')))))"])), | |||
("a and b or c and d", ", ".join([ | |||
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", | |||
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND", | |||
"Text(String(u'd')))))"])), | |||
("a and b or c or d", ", ".join([ | |||
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", | |||
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "OR", | |||
"Text(String(u'd')))))"])), | |||
("a and (b or c or d)", ", ".join([ | |||
"Tree(BinaryOp(Text(String(u'a'))", "AND", | |||
"BinaryOp(Text(String(u'b'))", "OR", "BinaryOp(Text(String(u'c'))", "OR", | |||
"Text(String(u'd'))))))"])), | |||
("a not b", ", ".join([ | |||
"Tree(BinaryOp(Text(String(u'a'))", "AND", "UnaryOp(NOT", | |||
"Text(String(u'b')))))"])), | |||
] | |||
class TestQueryParser(unittest.TestCase): | |||
"""Unit tests for the query parser in :py:mod:`bitshift.query`.""" | |||
def test_parse(self): | |||
"""test full query parsing""" | |||
for test, expected in TESTS: | |||
self.assertEqual(expected, parse_query(test).serialize()) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |