diff --git a/bitshift/codelet.py b/bitshift/codelet.py index acaa52b..92debf4 100644 --- a/bitshift/codelet.py +++ b/bitshift/codelet.py @@ -18,12 +18,14 @@ class Codelet(object): code was last modified. :ivar rank: (float) A quanitification of the source code's quality, as per available ratings (stars, forks, upvotes, etc.). - :ivar symbols: (dict) Dictionary containing dictionaries of functions, classes, - variable definitions, etc. + :ivar symbols: (dict) Dictionary containing dictionaries of functions, + classes, variable definitions, etc. + :ivar origin: (tuple) 3-tuple of (site_name, site_url, image_blob), as + added by the database. """ def __init__(self, name, code, filename, language, authors, code_url, - date_created, date_modified, rank): + date_created, date_modified, rank, symbols=None, origin=None): """ Create a Codelet instance. @@ -36,6 +38,8 @@ class Codelet(object): :param date_created: see :attr:`self.date_created` :param date_modified: see :attr:`self.date_modified` :param rank: see :attr:`self.rank` + :param symbols: see :attr:`self.symbols` + :param origin: see :attr:`self.origin` :type name: see :attr:`self.name` :type code: see :attr:`self.code` @@ -46,6 +50,8 @@ class Codelet(object): :type date_created: see :attr:`self.date_created` :type date_modified: see :attr:`self.date_modified` :type rank: see :attr:`self.rank` + :type symbols: see :attr:`self.symbols` + :type origin: see :attr:`self.origin` """ self.name = name @@ -57,3 +63,5 @@ class Codelet(object): self.date_created = date_created self.date_modified = date_modified self.rank = rank + self.symbols = symbols or {} + self.origin = origin or (None, None, None) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 75f39da..e4fa430 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -9,6 +9,9 @@ import mmh3 import oursql from .migration import VERSION, MIGRATIONS +from ..codelet import Codelet +from ..query.nodes import (String, Regex, Text, Language, Author, Date, Symbol, + BinaryOp, UnaryOp) __all__ = ["Database"] @@ -51,9 +54,71 @@ class Database(object): "Run `python -m bitshift.database.migration`." raise RuntimeError(err) + def _search_with_query(self, cursor, tree, page): + """Execute an SQL query based on a query tree, and return results. + + The returned data is a 2-tuple of (list of codelet IDs, estimated + number of total results). + """ + query, args = tree.build_query(page) + cursor.execute(query, args) + ids = [id for id, _ in cursor.fetchall()] + num_results = 0 # TODO: NotImplemented + return ids, num_results + + def _get_authors_for_codelet(self, cursor, codelet_id): + """Return a list of authors for a given codelet.""" + query = """SELECT author_name, author_url + FROM authors + WHERE author_codelet = ?""" + + cursor.execute(query, (codelet_id,)) + return cursor.fetchall() + + def _get_symbols_for_code(self, cursor, code_id): + """Return a list of symbols for a given codelet.""" + query = """SELECT symbol_type, symbol_name, sloc_type, sloc_row, + sloc_col, sloc_end_row, sloc_end_col + FROM symbols + INNER JOIN symbol_locations ON sloc_symbol = symbol_id + WHERE symbol_code = ?""" + + symbols = {type_: {} for type_ in Symbol.TYPES_INV} + cursor.execute(query, (code_id,)) + for type_, name, loc_type, row, col, erow, ecol in cursor.fetchall(): + sdict = symbols[Symbol.TYPES_INV[type_]] + if name not in sdict: + sdict[name] = ((), ()) + sdict[name][loc_type].append((row, col, erow, ecol)) + for type_, sdict in symbols.items(): + symbols[type_] = [(n, d, u) for n, (d, u) in sdict.iteritems()] + return symbols + def _get_codelets_from_ids(self, cursor, ids): """Return a list of Codelet objects given a list of codelet IDs.""" - raise NotImplementedError() ## TODO + query = """SELECT * + FROM codelets + INNER JOIN code ON codelet_code_id = code_id + INNER JOIN origins ON codelet_origin = origin_id + WHERE codelet_id = ?""" + + with self._conn.cursor(oursql.DictCursor) as dict_cursor: + dict_cursor.executemany(query, [(id,) for id in ids]) + for row in dict_cursor.fetchone(): + codelet_id = row["codelet_id"] + if row["origin_url_base"]: + url = row["codelet_url"] + else: + url = row["origin_url_base"] + row["codelet_url"] + origin = (row["origin_name"], row["origin_url"], + row["origin_image"]) + authors = self._get_authors_for_codelet(cursor, codelet_id) + symbols = self._get_symbols_for_code(cursor, row["code_id"]) + yield Codelet( + row["codelet_name"], row["code_code"], None, + row["code_lang"], authors, url, + row["codelet_date_created"], row["codelet_date_modified"], + row["codelet_rank"], symbols, origin) def _decompose_url(self, cursor, url): """Break up a URL into an origin (with a URL base) and a suffix.""" @@ -68,13 +133,12 @@ class Database(object): def _insert_symbols(self, cursor, code_id, sym_type, symbols): """Insert a list of symbols of a given type into the database.""" - sym_types = ["functions", "classes", "variables"] query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)" query2 = """INSERT INTO symbol_locations VALUES (DEFAULT, ?, ?, ?, ?, ?, ?)""" for (name, decls, uses) in symbols: - cursor.execute(query1, (code_id, sym_types.index(sym_type), name)) + cursor.execute(query1, (code_id, Symbol.TYPES_INV[sym_type], name)) sym_id = cursor.lastrowid params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] + [tuple([sym_id, 1] + list(loc)) for loc in uses]) @@ -112,16 +176,14 @@ class Database(object): num_results = results[0][1] * (10 ** results[0][2]) ids = [res[0] for res in results] else: # Cache miss - ## TODO: build and execute search query - results = cursor.fetchall() - ids = NotImplemented ## TODO: extract ids from results - num_results = NotImplemented ## TODO: num if results else 0 + ids, num_results = self._search_with_query(cursor, query, page) num_exp = max(len(str(num_results)) - 3, 0) num_results = int(round(num_results, -num_exp)) num_mnt = num_results / (10 ** num_exp) cursor.execute(query2, (cache_id, num_mnt, num_exp)) cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) - return (num_results, self._get_codelets_from_ids(cursor, ids)) + codelet_gen = self._get_codelets_from_ids(cursor, ids) + return (num_results, list(codelet_gen)) def insert(self, codelet): """ diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 55c76e1..bc22514 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -21,7 +21,7 @@ def _lang(codelet): if codelet.filename is not None: try: - return pgl.guess_lexer_for_filename(codelet.filename, '').name + return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name except: raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') diff --git a/bitshift/query/__init__.py b/bitshift/query/__init__.py index 6971c04..41d01cf 100644 --- a/bitshift/query/__init__.py +++ b/bitshift/query/__init__.py @@ -1,11 +1,283 @@ -from .association import Association -from .node import Node +""" +This subpackage contains code to parse search queries received from the +frontend into trees that can be used by the database backend. +""" + +from __future__ import unicode_literals +from re import IGNORECASE, search +from sys import maxsize + +from dateutil.parser import parse as parse_date + +from .nodes import (String, Regex, Text, Language, Author, Date, Symbol, + BinaryOp, UnaryOp) from .tree import Tree +from ..languages import LANGS -__all__ = ["parse_query"] +__all__ = ["QueryParseException", "parse_query"] -def parse_query(query): - # gets a string, returns a Tree - # TODO: note: resultant Trees should be normalized so that "foo OR bar" - # and "bar OR foo" result in equivalent trees +class QueryParseException(Exception): + """Raised by parse_query() when a query is invalid.""" pass + + +class _QueryParser(object): + """Wrapper class with methods to parse queries. Used as a singleton.""" + + def __init__(self): + self._prefixes = { + self._parse_language: ["l", "lang", "language"], + self._parse_author: ["a", "author"], + self._parse_modified: ["m", "mod", "modified", "modify"], + self._parse_created: ["cr", "create", "created"], + self._parse_symbol: ["s", "sym", "symb", "symbol"], + self._parse_function: ["f", "fn", "fun", "func", "function"], + self._parse_class: ["cl", "class", "clss"], + self._parse_variable: ["v", "var", "variable"] + } + + def _parse_literal(self, literal): + """Parse part of a search query into a string or regular expression.""" + if literal.startswith(("r:", "re:", "regex:", "regexp:")): + return Regex(literal.split(":", 1)[1]) + return String(literal) + + def _parse_language(self, term): + """Parse part of a query into a language node and return it.""" + term = self._parse_literal(term) + if isinstance(term, Regex): + langs = [i for i, lang in enumerate(LANGS) + if search(term.regex, lang, IGNORECASE)] + if not langs: + err = 'No languages found for regex: "%s"' % term.regex + raise QueryParseException(err) + node = Language(langs.pop()) + while langs: + node = BinaryOp(Language(langs.pop()), BinaryOp.OR, node) + return node + + needle = term.string.lower() + for i, lang in enumerate(LANGS): + if lang.lower() == needle: + return Language(i) + for i, lang in enumerate(LANGS): + if lang.lower().startswith(needle): + return Language(i) + err = 'No languages found for string: "%s"' % term.string + raise QueryParseException(err) + + def _parse_author(self, term): + """Parse part of a query into an author node and return it.""" + return Author(self._parse_literal(term)) + + def _parse_date(self, term, type_): + """Parse part of a query into a date node and return it.""" + if ":" not in term: + err = "A date relationship is required " \ + '("before:" or "after:"): "%s"' + raise QueryParseException(err % term) + relstr, dtstr = term.split(":", 1) + if relstr.lower() in ("before", "b"): + relation = Date.BEFORE + elif relstr.lower() in ("after", "a"): + relation = Date.AFTER + else: + err = 'Bad date relationship (should be "before" or "after"): "%s"' + raise QueryParseException(err % relstr) + try: + dt = parse_date(dtstr) + except (TypeError, ValueError): + raise QueryParseException('Bad date/time string: "%s"' % dtstr) + return Date(type_, relation, dt) + + def _parse_modified(self, term): + """Parse part of a query into a date modified node and return it.""" + return self._parse_date(term, Date.MODIFY) + + def _parse_created(self, term): + """Parse part of a query into a date created node and return it.""" + return self._parse_date(term, Date.CREATE) + + def _parse_symbol(self, term): + """Parse part of a query into a symbol node and return it.""" + return Symbol(Symbol.ALL, self._parse_literal(term)) + + def _parse_function(self, term): + """Parse part of a query into a function node and return it.""" + return Symbol(Symbol.FUNCTION, self._parse_literal(term)) + + def _parse_class(self, term): + """Parse part of a query into a class node and return it.""" + return Symbol(Symbol.CLASS, self._parse_literal(term)) + + def _parse_variable(self, term): + """Parse part of a query into a variable node and return it.""" + return Symbol(Symbol.VARIABLE, self._parse_literal(term)) + + def _parse_term(self, term): + """Parse a query term into a tree node and return it.""" + try: + term = term.decode("unicode_escape") + except UnicodeDecodeError: + raise QueryParseException('Invalid query term: "%s"' % term) + if ":" in term and not term[0] == ":": + prefix, arg = term.split(":", 1) + invert = prefix.lower() == "not" + if invert: + prefix, arg = arg.split(":", 1) + if not arg: + raise QueryParseException('Incomplete query term: "%s"' % term) + for meth, prefixes in self._prefixes.iteritems(): + if prefix.lower() in prefixes: + if invert: + return UnaryOp(UnaryOp.NOT, meth(arg)) + return meth(arg) + return Text(self._parse_literal(term)) + + def _scan_query(self, query, markers): + """Scan a query (sub)string for the first occurance of some markers. + + Returns a 2-tuple of (first_marker_found, marker_index). + """ + def is_escaped(query, index): + """Return whether a query marker is backslash-escaped.""" + return (index > 0 and query[index - 1] == "\\" and + (index < 2 or query[index - 2] != "\\")) + + best_marker, best_index = None, maxsize + for marker in markers: + index = query.find(marker) + if is_escaped(query, index): + _, new_index = self._scan_query(query[index + 1:], marker) + index += new_index + 1 + if index >= 0 and index < best_index: + best_marker, best_index = marker, index + return best_marker, best_index + + def _split_query(self, query, parens=False): + """Split a query string into a nested list of query terms. + + Returns a list of terms and/or nested sublists of terms. Each term and + sublist is guarenteed to be non-empty. + """ + query = query.lstrip() + if not query: + return [] + marker, index = self._scan_query(query, " \"'()") + if not marker: + return [query] + nest = [query[:index]] if index > 0 else [] + after = query[index + 1:] + + if marker == " ": + nest += self._split_query(after, parens) + elif marker in ('"', "'"): + close_marker, close_index = self._scan_query(after, marker) + if close_marker: + if close_index > 0: + nest.append(after[:close_index]) + after = after[close_index + 1:] + nest += self._split_query(after, parens) + elif after: + nest.append(after) + elif marker == "(": + inner, after = self._split_query(after, True), [] + if inner and isinstance(inner[-1], tuple): + after = self._split_query(inner.pop()[0], parens) + if inner: + nest.append(inner) + if after: + nest += after + elif marker == ")": + if parens: + nest.append((after,)) + else: + nest += self._split_query(after) + return nest + + def _parse_boolean_operators(self, nest): + """Parse boolean operators in a nested query list.""" + op_lookup = { + "and": BinaryOp.AND, + "or": BinaryOp.OR, + "not": UnaryOp.NOT + } + for i, term in enumerate(nest): + if isinstance(term, list): + self._parse_boolean_operators(term) + else: + nest[i] = op_lookup.get(term.lower(), term) + + def _parse_nest(self, nest): + """Recursively parse a nested list of search query terms.""" + def parse_binary_op(op): + """Parse a binary operator in a nested query list.""" + index = nest.index(op) + if index == 0 or index == len(nest) - 1: + err = "Invalid query: '%s' given without argument." + raise QueryParseException(err % BinaryOp.OPS[op]) + left = self._parse_nest(nest[:index]) + right = self._parse_nest(nest[index + 1:]) + return BinaryOp(left, op, right) + + if not nest: + err = "Error while parsing query: empty nest detected." + raise QueryParseException(err) + elif BinaryOp.OR in nest: + return parse_binary_op(BinaryOp.OR) + elif BinaryOp.AND in nest: + return parse_binary_op(BinaryOp.AND) + elif UnaryOp.NOT in nest: + index = nest.index(UnaryOp.NOT) + if index == len(nest) - 1: + err = "Invalid query: '%s' given without argument." + raise QueryParseException(err % UnaryOp.OPS[UnaryOp.NOT]) + right = UnaryOp(UnaryOp.NOT, self._parse_nest(nest[index + 1:])) + if index > 0: + left = self._parse_nest(nest[:index]) + return BinaryOp(left, BinaryOp.AND, right) + return right + elif len(nest) > 1: + left, right = self._parse_term(nest[0]), self._parse_nest(nest[1:]) + return BinaryOp(left, BinaryOp.AND, right) + elif isinstance(nest[0], list): + return self._parse_nest(nest[0]) + else: + return self._parse_term(nest[0]) + + def _balance_tree(self, node): + """Auto-balance a tree using a string sorting function.""" + if isinstance(node, BinaryOp): + self._balance_tree(node.left) + self._balance_tree(node.right) + if node.right.sortkey() < node.left.sortkey(): + node.left, node.right = node.right, node.left + elif isinstance(node, UnaryOp): + self._balance_tree(node.node) + + def parse(self, query): + """ + Parse a search query. + + The result is normalized with a sorting function so that + ``"foo OR bar"`` and ``"bar OR foo"`` result in the same tree. This is + important for caching purposes. + + :param query: The query be converted. + :type query: str + + :return: A tree storing the data in the query. + :rtype: :py:class:`~.query.tree.Tree` + + :raises: :py:class:`.QueryParseException` + """ + nest = self._split_query(query.rstrip()) + if not nest: + raise QueryParseException('Empty query: "%s"' % query) + self._parse_boolean_operators(nest) + root = self._parse_nest(nest) + self._balance_tree(root) + return Tree(root) + + +parse_query = _QueryParser().parse diff --git a/bitshift/query/nodes.py b/bitshift/query/nodes.py new file mode 100644 index 0000000..5d157b5 --- /dev/null +++ b/bitshift/query/nodes.py @@ -0,0 +1,276 @@ +from ..languages import LANGS + +__all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol", + "BinaryOp", "UnaryOp"] + +class _Node(object): + """Represents a single node in a query tree. + + Generally speaking, a node is a constraint applied to the database. Thus, + a :py:class:`~.Language` node represents a constraint where only codelets + of a specific language are selected. + """ + + def sortkey(self): + """Return a string sort key for the node.""" + return "" + + def parameterize(self, tables): + """Parameterize the node. + + Returns a 4-tuple of (conditional string, parameter list, rank list, + should-we-rank boolean). If the rank list is empty, then it is assumed + to contain the conditional string. + """ + return "", [], [], False + + +class _Literal(object): + """Represents a literal component of a search query, present at the leaves. + + A literal might be a string or a regular expression. + """ + pass + + +class String(_Literal): + """Represents a string literal.""" + + def __init__(self, string): + """ + :type string: unicode + """ + self.string = string + + def __repr__(self): + return "String({0!r})".format(self.string) + + def sortkey(self): + return self.string + + +class Regex(_Literal): + """Represents a regular expression literal.""" + + def __init__(self, regex): + """ + :type string: unicode + """ + self.regex = regex + + def __repr__(self): + return "Regex({0!r})".format(self.regex) + + def sortkey(self): + return self.regex + + +class Text(_Node): + """Represents a text node. + + Searches in codelet names (full-text search), symbols (equality), and + source code (full-text search). + """ + + def __init__(self, text): + """ + :type text: :py:class:`._Literal` + """ + self.text = text + + def __repr__(self): + return "Text({0})".format(self.text) + + def sortkey(self): + return self.text.sortkey() + + def parameterize(self, tables): + tables |= {"code", "symbols"} + if isinstance(self.text, Regex): + ranks = ["(codelet_name REGEXP ?)", "(symbol_name REGEXP ?)", + "(code_code REGEXP ?)"] + text = self.text.regex + else: + ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))", + "(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))", + "(symbol_name = ?)"] + text = self.text.string + cond = "(" + " OR ".join(ranks) + ")" + return cond, [text] * 3, ranks, True + + +class Language(_Node): + """Represents a language node. + + Searches in the code_lang field. + """ + + def __init__(self, lang): + """ + :type lang: int + """ + self.lang = lang + + def __repr__(self): + return "Language({0})".format(LANGS[self.lang]) + + def sortkey(self): + return LANGS[self.lang] + + def parameterize(self, tables): + tables |= {"code"} + return "(code_lang = ?)", [self.lang], [], False + + +class Author(_Node): + """Represents a author node. + + Searches in the author_name field (full-text search). + """ + + def __init__(self, name): + """ + :type name: :py:class:`_Literal` + """ + self.name = name + + def __repr__(self): + return "Author({0})".format(self.name) + + def sortkey(self): + return self.name.sortkey() + + def parameterize(self, tables): + tables |= {"authors"} + if isinstance(self.name, Regex): + return "(author_name REGEXP ?)", [self.name.regex], [], False + cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))" + return cond, [self.name.string], [], True + + +class Date(_Node): + """Represents a date node. + + Searches in the codelet_date_created or codelet_date_modified fields. + """ + CREATE = 1 + MODIFY = 2 + + BEFORE = 1 + AFTER = 2 + + def __init__(self, type_, relation, date): + """ + :type type_: int (``CREATE`` or ``MODIFY``) + :type relation: int (``BEFORE``, ``AFTER``) + :type date: datetime.datetime + """ + self.type = type_ + self.relation = relation + self.date = date + + def __repr__(self): + types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"} + relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"} + tm = "Date({0}, {1}, {2})" + return tm.format(types[self.type], relations[self.relation], self.date) + + def sortkey(self): + return self.date.strftime("%Y%m%d%H%M%S") + + def parameterize(self, tables): + column = {self.CREATE: "codelet_date_created", + self.MODIFY: "codelet_date_modified"}[self.type] + op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation] + return "(" + column + " " + op + " ?)", [self.date], [], False + + +class Symbol(_Node): + """Represents a symbol node. + + Searches in symbol_type and symbol_name. + """ + ALL = -1 + FUNCTION = 0 + CLASS = 1 + VARIABLE = 2 + TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"} + TYPES_INV = ["functions", "classes", "variables"] + + def __init__(self, type_, name): + """ + :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.) + :type name: :py:class:`._Literal` + """ + self.type = type_ + self.name = name + + def __repr__(self): + type_ = self.TYPES.get(self.type, "ALL") + return "Symbol({0}, {1})".format(type_, self.name) + + def sortkey(self): + return self.name.sortkey() + + def parameterize(self, tables): + tables |= {"code", "symbols"} + if isinstance(self.name, Regex): + cond, name = "symbol_name REGEXP ?", self.name.regex + else: + cond, name = "symbol_name = ?", self.name.string + if self.type == self.ALL: + types = ", ".join(str(type_) for type_ in self.TYPES) + cond += " AND symbol_type IN (%s)" % types + if self.type != self.ALL: + cond += " AND symbol_type = %d" % self.type + return "(" + cond + ")", [name], [], False + + +class BinaryOp(_Node): + """Represents a relationship between two nodes: ``and``, ``or``.""" + AND = object() + OR = object() + OPS = {AND: "AND", OR: "OR"} + + def __init__(self, left, op, right): + self.left = left + self.op = op + self.right = right + + def __repr__(self): + tmpl = "BinaryOp({0}, {1}, {2})" + return tmpl.format(self.left, self.OPS[self.op], self.right) + + def sortkey(self): + return self.left.sortkey() + self.right.sortkey() + + def parameterize(self, tables): + lcond, largs, lranks, need_lranks = self.left.parameterize(tables) + rcond, rargs, rranks, need_rranks = self.right.parameterize(tables) + lranks, rranks = lranks or [lcond], rranks or [rcond] + op = self.OPS[self.op] + cond = "(" + lcond + " " + op + " " + rcond + ")" + need_ranks = need_lranks or need_rranks or self.op == self.OR + return cond, largs + rargs, lranks + rranks, need_ranks + + +class UnaryOp(_Node): + """Represents a transformation applied to one node: ``not``.""" + NOT = object() + OPS = {NOT: "NOT"} + + def __init__(self, op, node): + self.op = op + self.node = node + + def __repr__(self): + return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node) + + def sortkey(self): + return self.node.sortkey() + + def parameterize(self, tables): + cond, args, ranks, need_ranks = self.node.parameterize(tables) + new_cond = "(" + self.OPS[self.op] + " " + cond + ")" + ranks = ranks or [cond] + return new_cond, args, ranks, need_ranks diff --git a/bitshift/query/tree.py b/bitshift/query/tree.py new file mode 100644 index 0000000..5da3f02 --- /dev/null +++ b/bitshift/query/tree.py @@ -0,0 +1,69 @@ +__all__ = ["Tree"] + +QUERY_TEMPLATE = """SELECT codelet_id, (codelet_rank%s) AS score +FROM codelets %s +WHERE %s +GROUP BY codelet_id +ORDER BY score DESC +LIMIT %d OFFSET %d""".replace("\n", " ") + +class Tree(object): + """Represents a query tree.""" + + def __init__(self, root): + self._root = root + + def __repr__(self): + return "Tree({0})".format(self._root) + + @property + def root(self): + """The root node of the tree.""" + return self._root + + def sortkey(self): + """Return a string sort key for the query tree.""" + return self._root.sortkey() + + def serialize(self): + """Create a string representation of the query for caching. + + :return: Query string representation. + :rtype: str + """ + return repr(self) + + def build_query(self, page=1, page_size=10): + """Convert the query tree into a parameterized SQL SELECT statement. + + :param page: The page number to get results for. + :type page: int + :param page_size: The number of results per page. + :type page_size: int + + :return: SQL query data. + :rtype: 2-tuple of (SQL statement string, query parameter tuple) + """ + def get_table_joins(tables): + data = [ + ("code", "codelet_code_id", "code_id"), + ("authors", "author_codelet", "codelet_id"), + ("symbols", "symbol_code", "code_id") + ] + tmpl = "INNER JOIN %s ON %s = %s" + for args in data: + if args[0] in tables: + yield tmpl % args + + tables = set() + cond, arglist, ranks, need_ranks = self._root.parameterize(tables) + ranks = ranks or [cond] + if need_ranks: + score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks)) + else: + score = "" + joins = " ".join(get_table_joins(tables)) + offset = (page - 1) * page_size + + query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset) + return query, tuple(arglist * 2 if need_ranks else arglist) diff --git a/setup.py b/setup.py index 48d4c42..5ab7a7c 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,8 @@ setup( packages = find_packages(), install_requires = [ "Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", - "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"], + "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3", + "python-dateutil>=2.2"], author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", license = "MIT", url = "https://github.com/earwig/bitshift" diff --git a/static/font/Inconsolata.otf b/static/font/Inconsolata.otf new file mode 100755 index 0000000..e7e1fa0 Binary files /dev/null and b/static/font/Inconsolata.otf differ diff --git a/static/js/index.js b/static/js/index.js index cc04f9b..b4fb30a 100644 --- a/static/js/index.js +++ b/static/js/index.js @@ -10,7 +10,6 @@ var searchBar = $("form#search-bar input[type='text']")[0]; var resultsDiv = $("div#results")[0]; var typingTimer, lastValue; - /* * Set all page callbacks. */ @@ -43,10 +42,32 @@ var typingTimer, lastValue; event.preventDefault(); return false; }); - searchBar.onkeyup = typingTimer; }()); +//Obtained by parsing python file with pygments +var codeExample = '
 1\n 2\n 3\n 4\n 5\n 6\n 7\n 8\n 9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n36\n37\n38\n39\n40
"""\nModule to contain all the project's Flask server plumbing.\n"""\n\nfrom flask import Flask\nfrom flask import render_template, session\n\nfrom bitshift import assets\n# from bitshift.database import Database\n# from bitshift.query import parse_query\n\napp = Flask(__name__)\napp.config.from_object("bitshift.config")\n\napp_env = app.jinja_env\napp_env.line_statement_prefix = "="\napp_env.globals.update(assets=assets)\n\n# database = Database()\n\n@app.route("/")\ndef index():\n    return render_template("index.html")\n\n@app.route("/search/<query>")\ndef search(query):\n    # tree = parse_query(query)\n    # database.search(tree)\n    pass\n\n@app.route("/about")\ndef about():\n    return render_template("about.html")\n\n@app.route("/developers")\ndef developers():\n    return render_template("developers.html")\n\nif __name__ == "__main__":\n    app.run(debug=True)\n
\n
' +searchBar.onkeyup = typingTimer; + +var testCodelet = { + 'code_url': 'https://github.com/earwig/bitshift/blob/develop/app.py', + 'filename': 'app.py', + 'language': 'python', + 'date_created': 'May 10, 2014', + 'date_modified': '2 days ago', + 'origin': ['GitHub', 'https://github.com', ''], + 'authors': ['sevko', 'earwig'], + 'html_code': codeExample +}; + +// Enable infinite scrolling down the results page. +$(window).scroll(function() { + var searchField = $("div#search-field"); + if($(window).scrollTop() + $(window).height() == $(document).height() && searchField.hasClass('partly-visible')){ + loadMoreResults(); + } +}); + /* * Clear the existing timer and set a new one the the user types text into the * search bar. @@ -117,6 +138,98 @@ function populateResults(){ } /* + * Create a result element based upon a codelet instance. + * + * @return {Element} The result element. + */ +function createResult(codelet) { + //Level 1 + var newDiv = document.createElement("div"), + table = document.createElement("table"), + row = document.createElement("tr"); + //Level 2 + var displayInfo = document.createElement("div"), + sidebar = document.createElement("td"), + codeElt = document.createElement("td"), + displayButton = document.createElement("td"), + hiddenInfoContainer = document.createElement("td"), + hiddenInfo = document.createElement("div"); + //Level 3 + var title = document.createElement("span"), + site = document.createElement("span"), + dateModified = document.createElement("span"), + language = document.createElement("span"), + dateCreated = document.createElement("span"), + authors = document.createElement("div"); + + //Classes and ID's + newDiv.classList.add('result'); + + displayInfo.id = 'display-info'; + sidebar.id = 'sidebar'; + codeElt.id = 'code'; + displayButton.id = 'display-button'; + hiddenInfo.id = 'hidden-info'; + + title.id = 'title'; + site.id = 'site'; + dateModified.id = 'date-modified'; + language.id = 'language'; + dateCreated.id = 'date-created'; + authors.id = 'authors'; + + //Add the bulk of the html + title.innerHTML = 'File ' + + codelet.filename + ''; + site.innerHTML = 'on ' + codelet.origin[0] +''; + language.innerHTML = codelet.language; + dateModified.innerHTML = 'Last modified ' + codelet.date_modified; + // Needs to be changed from int to string on the server + dateCreated.innerHTML = 'Created ' + codelet.date_created; + authors.innerHTML = 'Authors: '; + $.each(codelet.authors, function(i, a) { + authors.innerHTML += '' + a + ' '; + }); + + sidebar.innerHTML = ''; + // Needs to be processed on the server + codeElt.innerHTML = '
' + codelet.html_code + '
'; + + //Event binding + $(displayButton).hover(function(e) { + $(row).addClass('display-all'); + }); + + $(newDiv).on('transitionend', function(e) { + $(newDiv).one('mouseleave', function(e) { + $(row).removeClass('display-all'); + }); + }); + + //Finish and append elements to parent elements + hiddenInfo.appendChild(dateCreated); + hiddenInfo.appendChild(dateModified); + hiddenInfo.appendChild(authors); + + hiddenInfoContainer.appendChild(hiddenInfo); + + row.appendChild(sidebar); + row.appendChild(codeElt); + row.appendChild(hiddenInfoContainer); + row.appendChild(displayButton); + table.appendChild(row); + + displayInfo.appendChild(title); + displayInfo.appendChild(site); + displayInfo.appendChild(language); + + newDiv.appendChild(displayInfo); + newDiv.appendChild(table); + + return newDiv; +} + +/* * AJAX the current query string to the server, and return its response. * * @return {Array} The server's response in the form of `div.result` DOM @@ -125,12 +238,7 @@ function populateResults(){ function queryServer(){ var resultDivs = [] for(var result = 0; result < 20; result++){ - var newDiv = document.createElement("div"); - newDiv.classList.add("result"); - newDiv.innerHTML = Math.random(); - newDiv.style.textAlign = "center"; - newDiv.style.color = "#" + Math.floor(Math.random() * - 16777215).toString(16); + var newDiv = createResult(testCodelet); resultDivs.push(newDiv); } diff --git a/static/sass/index.sass b/static/sass/index.sass index e20b53f..f8b1198 100644 --- a/static/sass/index.sass +++ b/static/sass/index.sass @@ -6,6 +6,10 @@ @import variables $minSearchFieldsWidth: 490px +$resultWidth: 1000px +$sidebarWidth: 30px +$codeWidth: 650px +$hiddenInfoWidth: 250px .ui-datepicker font-size: 70% @@ -282,13 +286,124 @@ div#results margin-right: auto width: 80% + /* TODO: + 1) Sidebar + - Add way to cycle through hits in the code. + 2) Hidden info + - Add links for authors. + - Remove language field. + 3) Header + - Add an icon for the website. + - Add language tag. + 4) Code body + - Add highlighting. + 5) Display button + - unicode glyph */ div.result - background-color: #F8F8F8 + width: $resultWidth + height: 200px + margin-top: 2% margin-bottom: 10% - margin-top: 1% - padding: 1% + + + table + border-collapse: collapse + border: 1px solid $baseColor3 + height: inherit + + tr + height: inherit &.cascade @extend .t3 - margin-bottom: 0% +div#display-info + font-size: 1.3em + padding: 5px 0px 5px 5px + border: 1px dotted $baseColor3 + border-bottom: none + width: 400px + + a + text-decoration: none + + &:hover + color: orange + + #title + margin-right: 10px + + #site + text-transform: capitalize + + #language + font-size: 0.8em + font-weight: bold + margin-left: 100px + padding: 3px + @include vendor(border-radius, 2px) + background: #ddd + color: orange + +td#sidebar + width: $sidebarWidth + background-color: #eee + border-right: 1px solid $baseColor3 + height: inherit + +td#code + width: $codeWidth + height: inherit + border-right: 1px solid $baseColor3 + @include vendor(transition, width 0.2s ease-in-out) + + .display-all & + width: 500px + + #tablecontainer + overflow: scroll + width: 100% + height: inherit + background-color: #49483e + + table + table-layout:fixed + border-collapse: collapse + border: none + font-family: monospace + +td#display-button + width: 25px + background: url(https://cdn1.iconfinder.com/data/icons/windows-8-metro-style/512/View_Details-.png) + background-size: 25px 25px + background-repeat: no-repeat + background-position: center + + .display-all & + @include vendor(transform, rotateY(180deg)) + +div#hidden-info + width: $hiddenInfoWidth + margin-left: -$hiddenInfoWidth + height: 100% + padding-top: 40px + font-size: 1.2em + line-height: 1.5em + @include vendor(transition, margin-left 0.2s ease-in-out) + + .display-all & + margin-left: 0px + padding-left: 20px + + #date-created + display: inline-block + + #date-modified + display: block + + #authors + a + text-decoration: none + + &:hover + color: orange diff --git a/static/sass/main.sass b/static/sass/main.sass index 3a2ab37..c3f3e9c 100644 --- a/static/sass/main.sass +++ b/static/sass/main.sass @@ -9,9 +9,7 @@ html, body height: 100% margin: 0 padding: 0 - - * - font-family: sans-serif + font-family: sans-serif div#container min-height: 100% diff --git a/templates/index.html b/templates/index.html index 566a171..254508b 100644 --- a/templates/index.html +++ b/templates/index.html @@ -8,6 +8,7 @@ {{ assets.tag("lib/jqueryui.custom.min.css") }} {{ assets.tag("lib/jquery.min.js") }} {{ assets.tag("lib/jquery-ui.min.js") }} + {{ assets.tag("lib/highlight.css") }} {{ assets.tag("index.css") }} @@ -88,6 +89,31 @@ Regex + +
+
    +
  • + +
    +
  • + +
  • + +
    +
  • + +
  • + +
    +
  • + +
  • + +
    +
  • + +
+
diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_query_parser.py b/test/test_query_parser.py new file mode 100644 index 0000000..24941c0 --- /dev/null +++ b/test/test_query_parser.py @@ -0,0 +1,67 @@ +from __future__ import unicode_literals +import unittest + +from bitshift.query import parse_query + +TESTS = [ + # Text + ("test", "Tree(Text(String(u'test')))"), + ("re:test", "Tree(Text(Regex(u'test')))"), + + # Language + ("language:python", "Tree(Language(Python))"), + ("language:py", "Tree(Language(Python))"), + ("l:r:r..y", "Tree(Language(Ruby))"), + ("lang:re:py|c", "Tree(BinaryOp(Language(C), OR, Language(Python)))"), + + # Author + ('"author:Ben Kurtovic"', "Tree(Author(String(u'Ben Kurtovic')))"), + (r"'a:re:b.*?\sk.*?'", r"Tree(Author(Regex(u'b.*?\\sk.*?')))"), + + # Date + ("'create:before:Jan 1, 2014'", + "Tree(Date(CREATE, BEFORE, 2014-01-01 00:00:00))"), + ("'modify:after:2010-05-09 10:11:12'", + "Tree(Date(MODIFY, AFTER, 2010-05-09 10:11:12))"), + + # Symbol + ("sym:foobar", "Tree(Symbol(ALL, String(u'foobar')))"), + ("func:foo_bar", "Tree(Symbol(FUNCTION, String(u'foo_bar')))"), + ("func:foo_bar()", "Tree(Symbol(FUNCTION, String(u'foo_bar')))"), + ("class:FooBar", "Tree(Symbol(CLASS, String(u'FooBar')))"), + ("var:foobar", "Tree(Symbol(VARIABLE, String(u'foobar')))"), + ("var:r:foobar", "Tree(Symbol(VARIABLE, Regex(u'foobar')))"), + + # Composition + ("(a and b) or (c and d)", ", ".join([ + "Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", + "Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND", + "Text(String(u'd')))))"])), + ("a and b or c and d", ", ".join([ + "Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", + "Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND", + "Text(String(u'd')))))"])), + ("a and b or c or d", ", ".join([ + "Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", + "Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "OR", + "Text(String(u'd')))))"])), + ("a and (b or c or d)", ", ".join([ + "Tree(BinaryOp(Text(String(u'a'))", "AND", + "BinaryOp(Text(String(u'b'))", "OR", "BinaryOp(Text(String(u'c'))", "OR", + "Text(String(u'd'))))))"])), + ("a not b", ", ".join([ + "Tree(BinaryOp(Text(String(u'a'))", "AND", "UnaryOp(NOT", + "Text(String(u'b')))))"])), +] + +class TestQueryParser(unittest.TestCase): + """Unit tests for the query parser in :py:mod:`bitshift.query`.""" + + def test_parse(self): + """test full query parsing""" + for test, expected in TESTS: + self.assertEqual(expected, parse_query(test).serialize()) + + +if __name__ == "__main__": + unittest.main(verbosity=2)