Browse Source

Merge branch 'feature/style.results' into feature/style

Conflicts:
    static/js/index.js
    templates/index.html
tags/v1.0^2
Severyn Kozak 10 years ago
parent
commit
c6e5b4f0cc
14 changed files with 1037 additions and 35 deletions
  1. +11
    -3
      bitshift/codelet.py
  2. +70
    -8
      bitshift/database/__init__.py
  3. +1
    -1
      bitshift/parser/__init__.py
  4. +279
    -7
      bitshift/query/__init__.py
  5. +276
    -0
      bitshift/query/nodes.py
  6. +69
    -0
      bitshift/query/tree.py
  7. +2
    -1
      setup.py
  8. BIN
     
  9. +116
    -8
      static/js/index.js
  10. +119
    -4
      static/sass/index.sass
  11. +1
    -3
      static/sass/main.sass
  12. +26
    -0
      templates/index.html
  13. +0
    -0
     
  14. +67
    -0
      test/test_query_parser.py

+ 11
- 3
bitshift/codelet.py View File

@@ -18,12 +18,14 @@ class Codelet(object):
code was last modified. code was last modified.
:ivar rank: (float) A quanitification of the source code's quality, as :ivar rank: (float) A quanitification of the source code's quality, as
per available ratings (stars, forks, upvotes, etc.). per available ratings (stars, forks, upvotes, etc.).
:ivar symbols: (dict) Dictionary containing dictionaries of functions, classes,
variable definitions, etc.
:ivar symbols: (dict) Dictionary containing dictionaries of functions,
classes, variable definitions, etc.
:ivar origin: (tuple) 3-tuple of (site_name, site_url, image_blob), as
added by the database.
""" """


def __init__(self, name, code, filename, language, authors, code_url, def __init__(self, name, code, filename, language, authors, code_url,
date_created, date_modified, rank):
date_created, date_modified, rank, symbols=None, origin=None):
""" """
Create a Codelet instance. Create a Codelet instance.


@@ -36,6 +38,8 @@ class Codelet(object):
:param date_created: see :attr:`self.date_created` :param date_created: see :attr:`self.date_created`
:param date_modified: see :attr:`self.date_modified` :param date_modified: see :attr:`self.date_modified`
:param rank: see :attr:`self.rank` :param rank: see :attr:`self.rank`
:param symbols: see :attr:`self.symbols`
:param origin: see :attr:`self.origin`


:type name: see :attr:`self.name` :type name: see :attr:`self.name`
:type code: see :attr:`self.code` :type code: see :attr:`self.code`
@@ -46,6 +50,8 @@ class Codelet(object):
:type date_created: see :attr:`self.date_created` :type date_created: see :attr:`self.date_created`
:type date_modified: see :attr:`self.date_modified` :type date_modified: see :attr:`self.date_modified`
:type rank: see :attr:`self.rank` :type rank: see :attr:`self.rank`
:type symbols: see :attr:`self.symbols`
:type origin: see :attr:`self.origin`
""" """


self.name = name self.name = name
@@ -57,3 +63,5 @@ class Codelet(object):
self.date_created = date_created self.date_created = date_created
self.date_modified = date_modified self.date_modified = date_modified
self.rank = rank self.rank = rank
self.symbols = symbols or {}
self.origin = origin or (None, None, None)

+ 70
- 8
bitshift/database/__init__.py View File

@@ -9,6 +9,9 @@ import mmh3
import oursql import oursql


from .migration import VERSION, MIGRATIONS from .migration import VERSION, MIGRATIONS
from ..codelet import Codelet
from ..query.nodes import (String, Regex, Text, Language, Author, Date, Symbol,
BinaryOp, UnaryOp)


__all__ = ["Database"] __all__ = ["Database"]


@@ -51,9 +54,71 @@ class Database(object):
"Run `python -m bitshift.database.migration`." "Run `python -m bitshift.database.migration`."
raise RuntimeError(err) raise RuntimeError(err)


def _search_with_query(self, cursor, tree, page):
"""Execute an SQL query based on a query tree, and return results.

The returned data is a 2-tuple of (list of codelet IDs, estimated
number of total results).
"""
query, args = tree.build_query(page)
cursor.execute(query, args)
ids = [id for id, _ in cursor.fetchall()]
num_results = 0 # TODO: NotImplemented
return ids, num_results

def _get_authors_for_codelet(self, cursor, codelet_id):
"""Return a list of authors for a given codelet."""
query = """SELECT author_name, author_url
FROM authors
WHERE author_codelet = ?"""

cursor.execute(query, (codelet_id,))
return cursor.fetchall()

def _get_symbols_for_code(self, cursor, code_id):
"""Return a list of symbols for a given codelet."""
query = """SELECT symbol_type, symbol_name, sloc_type, sloc_row,
sloc_col, sloc_end_row, sloc_end_col
FROM symbols
INNER JOIN symbol_locations ON sloc_symbol = symbol_id
WHERE symbol_code = ?"""

symbols = {type_: {} for type_ in Symbol.TYPES_INV}
cursor.execute(query, (code_id,))
for type_, name, loc_type, row, col, erow, ecol in cursor.fetchall():
sdict = symbols[Symbol.TYPES_INV[type_]]
if name not in sdict:
sdict[name] = ((), ())
sdict[name][loc_type].append((row, col, erow, ecol))
for type_, sdict in symbols.items():
symbols[type_] = [(n, d, u) for n, (d, u) in sdict.iteritems()]
return symbols

def _get_codelets_from_ids(self, cursor, ids): def _get_codelets_from_ids(self, cursor, ids):
"""Return a list of Codelet objects given a list of codelet IDs.""" """Return a list of Codelet objects given a list of codelet IDs."""
raise NotImplementedError() ## TODO
query = """SELECT *
FROM codelets
INNER JOIN code ON codelet_code_id = code_id
INNER JOIN origins ON codelet_origin = origin_id
WHERE codelet_id = ?"""

with self._conn.cursor(oursql.DictCursor) as dict_cursor:
dict_cursor.executemany(query, [(id,) for id in ids])
for row in dict_cursor.fetchone():
codelet_id = row["codelet_id"]
if row["origin_url_base"]:
url = row["codelet_url"]
else:
url = row["origin_url_base"] + row["codelet_url"]
origin = (row["origin_name"], row["origin_url"],
row["origin_image"])
authors = self._get_authors_for_codelet(cursor, codelet_id)
symbols = self._get_symbols_for_code(cursor, row["code_id"])
yield Codelet(
row["codelet_name"], row["code_code"], None,
row["code_lang"], authors, url,
row["codelet_date_created"], row["codelet_date_modified"],
row["codelet_rank"], symbols, origin)


def _decompose_url(self, cursor, url): def _decompose_url(self, cursor, url):
"""Break up a URL into an origin (with a URL base) and a suffix.""" """Break up a URL into an origin (with a URL base) and a suffix."""
@@ -68,13 +133,12 @@ class Database(object):


def _insert_symbols(self, cursor, code_id, sym_type, symbols): def _insert_symbols(self, cursor, code_id, sym_type, symbols):
"""Insert a list of symbols of a given type into the database.""" """Insert a list of symbols of a given type into the database."""
sym_types = ["functions", "classes", "variables"]
query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)" query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)"
query2 = """INSERT INTO symbol_locations VALUES query2 = """INSERT INTO symbol_locations VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?)""" (DEFAULT, ?, ?, ?, ?, ?, ?)"""


for (name, decls, uses) in symbols: for (name, decls, uses) in symbols:
cursor.execute(query1, (code_id, sym_types.index(sym_type), name))
cursor.execute(query1, (code_id, Symbol.TYPES_INV[sym_type], name))
sym_id = cursor.lastrowid sym_id = cursor.lastrowid
params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] + params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] +
[tuple([sym_id, 1] + list(loc)) for loc in uses]) [tuple([sym_id, 1] + list(loc)) for loc in uses])
@@ -112,16 +176,14 @@ class Database(object):
num_results = results[0][1] * (10 ** results[0][2]) num_results = results[0][1] * (10 ** results[0][2])
ids = [res[0] for res in results] ids = [res[0] for res in results]
else: # Cache miss else: # Cache miss
## TODO: build and execute search query
results = cursor.fetchall()
ids = NotImplemented ## TODO: extract ids from results
num_results = NotImplemented ## TODO: num if results else 0
ids, num_results = self._search_with_query(cursor, query, page)
num_exp = max(len(str(num_results)) - 3, 0) num_exp = max(len(str(num_results)) - 3, 0)
num_results = int(round(num_results, -num_exp)) num_results = int(round(num_results, -num_exp))
num_mnt = num_results / (10 ** num_exp) num_mnt = num_results / (10 ** num_exp)
cursor.execute(query2, (cache_id, num_mnt, num_exp)) cursor.execute(query2, (cache_id, num_mnt, num_exp))
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) cursor.executemany(query3, [(cache_id, c_id) for c_id in ids])
return (num_results, self._get_codelets_from_ids(cursor, ids))
codelet_gen = self._get_codelets_from_ids(cursor, ids)
return (num_results, list(codelet_gen))


def insert(self, codelet): def insert(self, codelet):
""" """


+ 1
- 1
bitshift/parser/__init__.py View File

@@ -21,7 +21,7 @@ def _lang(codelet):


if codelet.filename is not None: if codelet.filename is not None:
try: try:
return pgl.guess_lexer_for_filename(codelet.filename, '').name
return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name
except: except:
raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename')




+ 279
- 7
bitshift/query/__init__.py View File

@@ -1,11 +1,283 @@
from .association import Association
from .node import Node
"""
This subpackage contains code to parse search queries received from the
frontend into trees that can be used by the database backend.
"""

from __future__ import unicode_literals
from re import IGNORECASE, search
from sys import maxsize

from dateutil.parser import parse as parse_date

from .nodes import (String, Regex, Text, Language, Author, Date, Symbol,
BinaryOp, UnaryOp)
from .tree import Tree from .tree import Tree
from ..languages import LANGS


__all__ = ["parse_query"]
__all__ = ["QueryParseException", "parse_query"]


def parse_query(query):
# gets a string, returns a Tree
# TODO: note: resultant Trees should be normalized so that "foo OR bar"
# and "bar OR foo" result in equivalent trees
class QueryParseException(Exception):
"""Raised by parse_query() when a query is invalid."""
pass pass


class _QueryParser(object):
"""Wrapper class with methods to parse queries. Used as a singleton."""

def __init__(self):
self._prefixes = {
self._parse_language: ["l", "lang", "language"],
self._parse_author: ["a", "author"],
self._parse_modified: ["m", "mod", "modified", "modify"],
self._parse_created: ["cr", "create", "created"],
self._parse_symbol: ["s", "sym", "symb", "symbol"],
self._parse_function: ["f", "fn", "fun", "func", "function"],
self._parse_class: ["cl", "class", "clss"],
self._parse_variable: ["v", "var", "variable"]
}

def _parse_literal(self, literal):
"""Parse part of a search query into a string or regular expression."""
if literal.startswith(("r:", "re:", "regex:", "regexp:")):
return Regex(literal.split(":", 1)[1])
return String(literal)

def _parse_language(self, term):
"""Parse part of a query into a language node and return it."""
term = self._parse_literal(term)
if isinstance(term, Regex):
langs = [i for i, lang in enumerate(LANGS)
if search(term.regex, lang, IGNORECASE)]
if not langs:
err = 'No languages found for regex: "%s"' % term.regex
raise QueryParseException(err)
node = Language(langs.pop())
while langs:
node = BinaryOp(Language(langs.pop()), BinaryOp.OR, node)
return node

needle = term.string.lower()
for i, lang in enumerate(LANGS):
if lang.lower() == needle:
return Language(i)
for i, lang in enumerate(LANGS):
if lang.lower().startswith(needle):
return Language(i)
err = 'No languages found for string: "%s"' % term.string
raise QueryParseException(err)

def _parse_author(self, term):
"""Parse part of a query into an author node and return it."""
return Author(self._parse_literal(term))

def _parse_date(self, term, type_):
"""Parse part of a query into a date node and return it."""
if ":" not in term:
err = "A date relationship is required " \
'("before:<date>" or "after:<date>"): "%s"'
raise QueryParseException(err % term)
relstr, dtstr = term.split(":", 1)
if relstr.lower() in ("before", "b"):
relation = Date.BEFORE
elif relstr.lower() in ("after", "a"):
relation = Date.AFTER
else:
err = 'Bad date relationship (should be "before" or "after"): "%s"'
raise QueryParseException(err % relstr)
try:
dt = parse_date(dtstr)
except (TypeError, ValueError):
raise QueryParseException('Bad date/time string: "%s"' % dtstr)
return Date(type_, relation, dt)

def _parse_modified(self, term):
"""Parse part of a query into a date modified node and return it."""
return self._parse_date(term, Date.MODIFY)

def _parse_created(self, term):
"""Parse part of a query into a date created node and return it."""
return self._parse_date(term, Date.CREATE)

def _parse_symbol(self, term):
"""Parse part of a query into a symbol node and return it."""
return Symbol(Symbol.ALL, self._parse_literal(term))

def _parse_function(self, term):
"""Parse part of a query into a function node and return it."""
return Symbol(Symbol.FUNCTION, self._parse_literal(term))

def _parse_class(self, term):
"""Parse part of a query into a class node and return it."""
return Symbol(Symbol.CLASS, self._parse_literal(term))

def _parse_variable(self, term):
"""Parse part of a query into a variable node and return it."""
return Symbol(Symbol.VARIABLE, self._parse_literal(term))

def _parse_term(self, term):
"""Parse a query term into a tree node and return it."""
try:
term = term.decode("unicode_escape")
except UnicodeDecodeError:
raise QueryParseException('Invalid query term: "%s"' % term)
if ":" in term and not term[0] == ":":
prefix, arg = term.split(":", 1)
invert = prefix.lower() == "not"
if invert:
prefix, arg = arg.split(":", 1)
if not arg:
raise QueryParseException('Incomplete query term: "%s"' % term)
for meth, prefixes in self._prefixes.iteritems():
if prefix.lower() in prefixes:
if invert:
return UnaryOp(UnaryOp.NOT, meth(arg))
return meth(arg)
return Text(self._parse_literal(term))

def _scan_query(self, query, markers):
"""Scan a query (sub)string for the first occurance of some markers.

Returns a 2-tuple of (first_marker_found, marker_index).
"""
def is_escaped(query, index):
"""Return whether a query marker is backslash-escaped."""
return (index > 0 and query[index - 1] == "\\" and
(index < 2 or query[index - 2] != "\\"))

best_marker, best_index = None, maxsize
for marker in markers:
index = query.find(marker)
if is_escaped(query, index):
_, new_index = self._scan_query(query[index + 1:], marker)
index += new_index + 1
if index >= 0 and index < best_index:
best_marker, best_index = marker, index
return best_marker, best_index

def _split_query(self, query, parens=False):
"""Split a query string into a nested list of query terms.

Returns a list of terms and/or nested sublists of terms. Each term and
sublist is guarenteed to be non-empty.
"""
query = query.lstrip()
if not query:
return []
marker, index = self._scan_query(query, " \"'()")
if not marker:
return [query]
nest = [query[:index]] if index > 0 else []
after = query[index + 1:]

if marker == " ":
nest += self._split_query(after, parens)
elif marker in ('"', "'"):
close_marker, close_index = self._scan_query(after, marker)
if close_marker:
if close_index > 0:
nest.append(after[:close_index])
after = after[close_index + 1:]
nest += self._split_query(after, parens)
elif after:
nest.append(after)
elif marker == "(":
inner, after = self._split_query(after, True), []
if inner and isinstance(inner[-1], tuple):
after = self._split_query(inner.pop()[0], parens)
if inner:
nest.append(inner)
if after:
nest += after
elif marker == ")":
if parens:
nest.append((after,))
else:
nest += self._split_query(after)
return nest

def _parse_boolean_operators(self, nest):
"""Parse boolean operators in a nested query list."""
op_lookup = {
"and": BinaryOp.AND,
"or": BinaryOp.OR,
"not": UnaryOp.NOT
}
for i, term in enumerate(nest):
if isinstance(term, list):
self._parse_boolean_operators(term)
else:
nest[i] = op_lookup.get(term.lower(), term)

def _parse_nest(self, nest):
"""Recursively parse a nested list of search query terms."""
def parse_binary_op(op):
"""Parse a binary operator in a nested query list."""
index = nest.index(op)
if index == 0 or index == len(nest) - 1:
err = "Invalid query: '%s' given without argument."
raise QueryParseException(err % BinaryOp.OPS[op])
left = self._parse_nest(nest[:index])
right = self._parse_nest(nest[index + 1:])
return BinaryOp(left, op, right)

if not nest:
err = "Error while parsing query: empty nest detected."
raise QueryParseException(err)
elif BinaryOp.OR in nest:
return parse_binary_op(BinaryOp.OR)
elif BinaryOp.AND in nest:
return parse_binary_op(BinaryOp.AND)
elif UnaryOp.NOT in nest:
index = nest.index(UnaryOp.NOT)
if index == len(nest) - 1:
err = "Invalid query: '%s' given without argument."
raise QueryParseException(err % UnaryOp.OPS[UnaryOp.NOT])
right = UnaryOp(UnaryOp.NOT, self._parse_nest(nest[index + 1:]))
if index > 0:
left = self._parse_nest(nest[:index])
return BinaryOp(left, BinaryOp.AND, right)
return right
elif len(nest) > 1:
left, right = self._parse_term(nest[0]), self._parse_nest(nest[1:])
return BinaryOp(left, BinaryOp.AND, right)
elif isinstance(nest[0], list):
return self._parse_nest(nest[0])
else:
return self._parse_term(nest[0])

def _balance_tree(self, node):
"""Auto-balance a tree using a string sorting function."""
if isinstance(node, BinaryOp):
self._balance_tree(node.left)
self._balance_tree(node.right)
if node.right.sortkey() < node.left.sortkey():
node.left, node.right = node.right, node.left
elif isinstance(node, UnaryOp):
self._balance_tree(node.node)

def parse(self, query):
"""
Parse a search query.

The result is normalized with a sorting function so that
``"foo OR bar"`` and ``"bar OR foo"`` result in the same tree. This is
important for caching purposes.

:param query: The query be converted.
:type query: str

:return: A tree storing the data in the query.
:rtype: :py:class:`~.query.tree.Tree`

:raises: :py:class:`.QueryParseException`
"""
nest = self._split_query(query.rstrip())
if not nest:
raise QueryParseException('Empty query: "%s"' % query)
self._parse_boolean_operators(nest)
root = self._parse_nest(nest)
self._balance_tree(root)
return Tree(root)


parse_query = _QueryParser().parse

+ 276
- 0
bitshift/query/nodes.py View File

@@ -0,0 +1,276 @@
from ..languages import LANGS

__all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
"BinaryOp", "UnaryOp"]

class _Node(object):
"""Represents a single node in a query tree.

Generally speaking, a node is a constraint applied to the database. Thus,
a :py:class:`~.Language` node represents a constraint where only codelets
of a specific language are selected.
"""

def sortkey(self):
"""Return a string sort key for the node."""
return ""

def parameterize(self, tables):
"""Parameterize the node.

Returns a 4-tuple of (conditional string, parameter list, rank list,
should-we-rank boolean). If the rank list is empty, then it is assumed
to contain the conditional string.
"""
return "", [], [], False


class _Literal(object):
"""Represents a literal component of a search query, present at the leaves.

A literal might be a string or a regular expression.
"""
pass


class String(_Literal):
"""Represents a string literal."""

def __init__(self, string):
"""
:type string: unicode
"""
self.string = string

def __repr__(self):
return "String({0!r})".format(self.string)

def sortkey(self):
return self.string


class Regex(_Literal):
"""Represents a regular expression literal."""

def __init__(self, regex):
"""
:type string: unicode
"""
self.regex = regex

def __repr__(self):
return "Regex({0!r})".format(self.regex)

def sortkey(self):
return self.regex


class Text(_Node):
"""Represents a text node.

Searches in codelet names (full-text search), symbols (equality), and
source code (full-text search).
"""

def __init__(self, text):
"""
:type text: :py:class:`._Literal`
"""
self.text = text

def __repr__(self):
return "Text({0})".format(self.text)

def sortkey(self):
return self.text.sortkey()

def parameterize(self, tables):
tables |= {"code", "symbols"}
if isinstance(self.text, Regex):
ranks = ["(codelet_name REGEXP ?)", "(symbol_name REGEXP ?)",
"(code_code REGEXP ?)"]
text = self.text.regex
else:
ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))",
"(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))",
"(symbol_name = ?)"]
text = self.text.string
cond = "(" + " OR ".join(ranks) + ")"
return cond, [text] * 3, ranks, True


class Language(_Node):
"""Represents a language node.

Searches in the code_lang field.
"""

def __init__(self, lang):
"""
:type lang: int
"""
self.lang = lang

def __repr__(self):
return "Language({0})".format(LANGS[self.lang])

def sortkey(self):
return LANGS[self.lang]

def parameterize(self, tables):
tables |= {"code"}
return "(code_lang = ?)", [self.lang], [], False


class Author(_Node):
"""Represents a author node.

Searches in the author_name field (full-text search).
"""

def __init__(self, name):
"""
:type name: :py:class:`_Literal`
"""
self.name = name

def __repr__(self):
return "Author({0})".format(self.name)

def sortkey(self):
return self.name.sortkey()

def parameterize(self, tables):
tables |= {"authors"}
if isinstance(self.name, Regex):
return "(author_name REGEXP ?)", [self.name.regex], [], False
cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
return cond, [self.name.string], [], True


class Date(_Node):
"""Represents a date node.

Searches in the codelet_date_created or codelet_date_modified fields.
"""
CREATE = 1
MODIFY = 2

BEFORE = 1
AFTER = 2

def __init__(self, type_, relation, date):
"""
:type type_: int (``CREATE`` or ``MODIFY``)
:type relation: int (``BEFORE``, ``AFTER``)
:type date: datetime.datetime
"""
self.type = type_
self.relation = relation
self.date = date

def __repr__(self):
types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
tm = "Date({0}, {1}, {2})"
return tm.format(types[self.type], relations[self.relation], self.date)

def sortkey(self):
return self.date.strftime("%Y%m%d%H%M%S")

def parameterize(self, tables):
column = {self.CREATE: "codelet_date_created",
self.MODIFY: "codelet_date_modified"}[self.type]
op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
return "(" + column + " " + op + " ?)", [self.date], [], False


class Symbol(_Node):
"""Represents a symbol node.

Searches in symbol_type and symbol_name.
"""
ALL = -1
FUNCTION = 0
CLASS = 1
VARIABLE = 2
TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"}
TYPES_INV = ["functions", "classes", "variables"]

def __init__(self, type_, name):
"""
:type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
:type name: :py:class:`._Literal`
"""
self.type = type_
self.name = name

def __repr__(self):
type_ = self.TYPES.get(self.type, "ALL")
return "Symbol({0}, {1})".format(type_, self.name)

def sortkey(self):
return self.name.sortkey()

def parameterize(self, tables):
tables |= {"code", "symbols"}
if isinstance(self.name, Regex):
cond, name = "symbol_name REGEXP ?", self.name.regex
else:
cond, name = "symbol_name = ?", self.name.string
if self.type == self.ALL:
types = ", ".join(str(type_) for type_ in self.TYPES)
cond += " AND symbol_type IN (%s)" % types
if self.type != self.ALL:
cond += " AND symbol_type = %d" % self.type
return "(" + cond + ")", [name], [], False


class BinaryOp(_Node):
"""Represents a relationship between two nodes: ``and``, ``or``."""
AND = object()
OR = object()
OPS = {AND: "AND", OR: "OR"}

def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right

def __repr__(self):
tmpl = "BinaryOp({0}, {1}, {2})"
return tmpl.format(self.left, self.OPS[self.op], self.right)

def sortkey(self):
return self.left.sortkey() + self.right.sortkey()

def parameterize(self, tables):
lcond, largs, lranks, need_lranks = self.left.parameterize(tables)
rcond, rargs, rranks, need_rranks = self.right.parameterize(tables)
lranks, rranks = lranks or [lcond], rranks or [rcond]
op = self.OPS[self.op]
cond = "(" + lcond + " " + op + " " + rcond + ")"
need_ranks = need_lranks or need_rranks or self.op == self.OR
return cond, largs + rargs, lranks + rranks, need_ranks


class UnaryOp(_Node):
"""Represents a transformation applied to one node: ``not``."""
NOT = object()
OPS = {NOT: "NOT"}

def __init__(self, op, node):
self.op = op
self.node = node

def __repr__(self):
return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)

def sortkey(self):
return self.node.sortkey()

def parameterize(self, tables):
cond, args, ranks, need_ranks = self.node.parameterize(tables)
new_cond = "(" + self.OPS[self.op] + " " + cond + ")"
ranks = ranks or [cond]
return new_cond, args, ranks, need_ranks

+ 69
- 0
bitshift/query/tree.py View File

@@ -0,0 +1,69 @@
__all__ = ["Tree"]

QUERY_TEMPLATE = """SELECT codelet_id, (codelet_rank%s) AS score
FROM codelets %s
WHERE %s
GROUP BY codelet_id
ORDER BY score DESC
LIMIT %d OFFSET %d""".replace("\n", " ")

class Tree(object):
"""Represents a query tree."""

def __init__(self, root):
self._root = root

def __repr__(self):
return "Tree({0})".format(self._root)

@property
def root(self):
"""The root node of the tree."""
return self._root

def sortkey(self):
"""Return a string sort key for the query tree."""
return self._root.sortkey()

def serialize(self):
"""Create a string representation of the query for caching.

:return: Query string representation.
:rtype: str
"""
return repr(self)

def build_query(self, page=1, page_size=10):
"""Convert the query tree into a parameterized SQL SELECT statement.

:param page: The page number to get results for.
:type page: int
:param page_size: The number of results per page.
:type page_size: int

:return: SQL query data.
:rtype: 2-tuple of (SQL statement string, query parameter tuple)
"""
def get_table_joins(tables):
data = [
("code", "codelet_code_id", "code_id"),
("authors", "author_codelet", "codelet_id"),
("symbols", "symbol_code", "code_id")
]
tmpl = "INNER JOIN %s ON %s = %s"
for args in data:
if args[0] in tables:
yield tmpl % args

tables = set()
cond, arglist, ranks, need_ranks = self._root.parameterize(tables)
ranks = ranks or [cond]
if need_ranks:
score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks))
else:
score = ""
joins = " ".join(get_table_joins(tables))
offset = (page - 1) * page_size

query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset)
return query, tuple(arglist * 2 if need_ranks else arglist)

+ 2
- 1
setup.py View File

@@ -6,7 +6,8 @@ setup(
packages = find_packages(), packages = find_packages(),
install_requires = [ install_requires = [
"Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", "Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0",
"beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"],
"beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3",
"python-dateutil>=2.2"],
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak",
license = "MIT", license = "MIT",
url = "https://github.com/earwig/bitshift" url = "https://github.com/earwig/bitshift"


BIN
View File


+ 116
- 8
static/js/index.js View File

@@ -10,7 +10,6 @@ var searchBar = $("form#search-bar input[type='text']")[0];
var resultsDiv = $("div#results")[0]; var resultsDiv = $("div#results")[0];


var typingTimer, lastValue; var typingTimer, lastValue;

/* /*
* Set all page callbacks. * Set all page callbacks.
*/ */
@@ -43,10 +42,32 @@ var typingTimer, lastValue;
event.preventDefault(); event.preventDefault();
return false; return false;
}); });

searchBar.onkeyup = typingTimer; searchBar.onkeyup = typingTimer;
}()); }());


//Obtained by parsing python file with pygments
var codeExample = '<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre> 1\n 2\n 3\n 4\n 5\n 6\n 7\n 8\n 9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n36\n37\n38\n39\n40</pre></div></td><td class="code"><div class="hll"><pre><span class="sd">&quot;&quot;&quot;</span>\n<span class="sd">Module to contain all the project&#39;s Flask server plumbing.</span>\n<span class="sd">&quot;&quot;&quot;</span>\n\n<span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">Flask</span>\n<span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">render_template</span><span class="p">,</span> <span class="n">session</span>\n\n<span class="kn">from</span> <span class="nn">bitshift</span> <span class="kn">import</span> <span class="n">assets</span>\n<span class="c"># from bitshift.database import Database</span>\n<span class="c"># from bitshift.query import parse_query</span>\n\n<span class="n">app</span> <span class="o">=</span> <span class="n">Flask</span><span class="p">(</span><span class="n">__name__</span><span class="p">)</span>\n<span class="n">app</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">from_object</span><span class="p">(</span><span class="s">&quot;bitshift.config&quot;</span><span class="p">)</span>\n\n<span class="n">app_env</span> <span class="o">=</span> <span class="n">app</span><span class="o">.</span><span class="n">jinja_env</span>\n<span class="n">app_env</span><span class="o">.</span><span class="n">line_statement_prefix</span> <span class="o">=</span> <span class="s">&quot;=&quot;</span>\n<span class="n">app_env</span><span class="o">.</span><span class="n">globals</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">assets</span><span class="o">=</span><span class="n">assets</span><span class="p">)</span>\n\n<span class="c"># database = Database()</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">&quot;/&quot;</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">index</span><span class="p">():</span>\n <span class="k">return</span> <span class="n">render_template</span><span class="p">(</span><span class="s">&quot;index.html&quot;</span><span class="p">)</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">&quot;/search/&lt;query&gt;&quot;</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">):</span>\n <span class="c"># tree = parse_query(query)</span>\n <span class="c"># database.search(tree)</span>\n <span class="k">pass</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">&quot;/about&quot;</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">about</span><span class="p">():</span>\n <span class="k">return</span> <span class="n">render_template</span><span class="p">(</span><span class="s">&quot;about.html&quot;</span><span class="p">)</span>\n\n<span class="nd">@app.route</span><span class="p">(</span><span class="s">&quot;/developers&quot;</span><span class="p">)</span>\n<span class="k">def</span> <span class="nf">developers</span><span class="p">():</span>\n <span class="k">return</span> <span class="n">render_template</span><span class="p">(</span><span class="s">&quot;developers.html&quot;</span><span class="p">)</span>\n\n<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">&quot;__main__&quot;</span><span class="p">:</span>\n <span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>\n</pre></div>\n</td></tr></table>'
searchBar.onkeyup = typingTimer;

var testCodelet = {
'code_url': 'https://github.com/earwig/bitshift/blob/develop/app.py',
'filename': 'app.py',
'language': 'python',
'date_created': 'May 10, 2014',
'date_modified': '2 days ago',
'origin': ['GitHub', 'https://github.com', ''],
'authors': ['sevko', 'earwig'],
'html_code': codeExample
};

// Enable infinite scrolling down the results page.
$(window).scroll(function() {
var searchField = $("div#search-field");
if($(window).scrollTop() + $(window).height() == $(document).height() && searchField.hasClass('partly-visible')){
loadMoreResults();
}
});

/* /*
* Clear the existing timer and set a new one the the user types text into the * Clear the existing timer and set a new one the the user types text into the
* search bar. * search bar.
@@ -117,6 +138,98 @@ function populateResults(){
} }


/* /*
* Create a result element based upon a codelet instance.
*
* @return {Element} The result element.
*/
function createResult(codelet) {
//Level 1
var newDiv = document.createElement("div"),
table = document.createElement("table"),
row = document.createElement("tr");
//Level 2
var displayInfo = document.createElement("div"),
sidebar = document.createElement("td"),
codeElt = document.createElement("td"),
displayButton = document.createElement("td"),
hiddenInfoContainer = document.createElement("td"),
hiddenInfo = document.createElement("div");
//Level 3
var title = document.createElement("span"),
site = document.createElement("span"),
dateModified = document.createElement("span"),
language = document.createElement("span"),
dateCreated = document.createElement("span"),
authors = document.createElement("div");

//Classes and ID's
newDiv.classList.add('result');

displayInfo.id = 'display-info';
sidebar.id = 'sidebar';
codeElt.id = 'code';
displayButton.id = 'display-button';
hiddenInfo.id = 'hidden-info';

title.id = 'title';
site.id = 'site';
dateModified.id = 'date-modified';
language.id = 'language';
dateCreated.id = 'date-created';
authors.id = 'authors';

//Add the bulk of the html
title.innerHTML = 'File <a href="' + codelet.code_url + '">'
+ codelet.filename + '</a>';
site.innerHTML = 'on <a href="' + codelet.origin[1] + '">' + codelet.origin[0] +'</a>';
language.innerHTML = codelet.language;
dateModified.innerHTML = 'Last modified ' + codelet.date_modified;
// Needs to be changed from int to string on the server
dateCreated.innerHTML = 'Created ' + codelet.date_created;
authors.innerHTML = 'Authors: ';
$.each(codelet.authors, function(i, a) {
authors.innerHTML += '<a href=#>' + a + ' </a>';
});

sidebar.innerHTML = '';
// Needs to be processed on the server
codeElt.innerHTML = '<div id=tablecontainer>' + codelet.html_code + '</div>';

//Event binding
$(displayButton).hover(function(e) {
$(row).addClass('display-all');
});

$(newDiv).on('transitionend', function(e) {
$(newDiv).one('mouseleave', function(e) {
$(row).removeClass('display-all');
});
});

//Finish and append elements to parent elements
hiddenInfo.appendChild(dateCreated);
hiddenInfo.appendChild(dateModified);
hiddenInfo.appendChild(authors);

hiddenInfoContainer.appendChild(hiddenInfo);

row.appendChild(sidebar);
row.appendChild(codeElt);
row.appendChild(hiddenInfoContainer);
row.appendChild(displayButton);
table.appendChild(row);

displayInfo.appendChild(title);
displayInfo.appendChild(site);
displayInfo.appendChild(language);

newDiv.appendChild(displayInfo);
newDiv.appendChild(table);

return newDiv;
}

/*
* AJAX the current query string to the server, and return its response. * AJAX the current query string to the server, and return its response.
* *
* @return {Array} The server's response in the form of `div.result` DOM * @return {Array} The server's response in the form of `div.result` DOM
@@ -125,12 +238,7 @@ function populateResults(){
function queryServer(){ function queryServer(){
var resultDivs = [] var resultDivs = []
for(var result = 0; result < 20; result++){ for(var result = 0; result < 20; result++){
var newDiv = document.createElement("div");
newDiv.classList.add("result");
newDiv.innerHTML = Math.random();
newDiv.style.textAlign = "center";
newDiv.style.color = "#" + Math.floor(Math.random() *
16777215).toString(16);
var newDiv = createResult(testCodelet);
resultDivs.push(newDiv); resultDivs.push(newDiv);
} }




+ 119
- 4
static/sass/index.sass View File

@@ -6,6 +6,10 @@
@import variables @import variables


$minSearchFieldsWidth: 490px $minSearchFieldsWidth: 490px
$resultWidth: 1000px
$sidebarWidth: 30px
$codeWidth: 650px
$hiddenInfoWidth: 250px


.ui-datepicker .ui-datepicker
font-size: 70% font-size: 70%
@@ -282,13 +286,124 @@ div#results
margin-right: auto margin-right: auto
width: 80% width: 80%


/* TODO:
1) Sidebar
- Add way to cycle through hits in the code.
2) Hidden info
- Add links for authors.
- Remove language field.
3) Header
- Add an icon for the website.
- Add language tag.
4) Code body
- Add highlighting.
5) Display button
- unicode glyph */
div.result div.result
background-color: #F8F8F8
width: $resultWidth
height: 200px
margin-top: 2%
margin-bottom: 10% margin-bottom: 10%
margin-top: 1%
padding: 1%


table
border-collapse: collapse
border: 1px solid $baseColor3
height: inherit

tr
height: inherit


&.cascade &.cascade
@extend .t3 @extend .t3


margin-bottom: 0%
div#display-info
font-size: 1.3em
padding: 5px 0px 5px 5px
border: 1px dotted $baseColor3
border-bottom: none
width: 400px

a
text-decoration: none

&:hover
color: orange

#title
margin-right: 10px

#site
text-transform: capitalize

#language
font-size: 0.8em
font-weight: bold
margin-left: 100px
padding: 3px
@include vendor(border-radius, 2px)
background: #ddd
color: orange

td#sidebar
width: $sidebarWidth
background-color: #eee
border-right: 1px solid $baseColor3
height: inherit

td#code
width: $codeWidth
height: inherit
border-right: 1px solid $baseColor3
@include vendor(transition, width 0.2s ease-in-out)

.display-all &
width: 500px

#tablecontainer
overflow: scroll
width: 100%
height: inherit
background-color: #49483e

table
table-layout:fixed
border-collapse: collapse
border: none
font-family: monospace

td#display-button
width: 25px
background: url(https://cdn1.iconfinder.com/data/icons/windows-8-metro-style/512/View_Details-.png)
background-size: 25px 25px
background-repeat: no-repeat
background-position: center

.display-all &
@include vendor(transform, rotateY(180deg))

div#hidden-info
width: $hiddenInfoWidth
margin-left: -$hiddenInfoWidth
height: 100%
padding-top: 40px
font-size: 1.2em
line-height: 1.5em
@include vendor(transition, margin-left 0.2s ease-in-out)

.display-all &
margin-left: 0px
padding-left: 20px

#date-created
display: inline-block

#date-modified
display: block

#authors
a
text-decoration: none

&:hover
color: orange

+ 1
- 3
static/sass/main.sass View File

@@ -9,9 +9,7 @@ html, body
height: 100% height: 100%
margin: 0 margin: 0
padding: 0 padding: 0

*
font-family: sans-serif
font-family: sans-serif


div#container div#container
min-height: 100% min-height: 100%


+ 26
- 0
templates/index.html View File

@@ -8,6 +8,7 @@
{{ assets.tag("lib/jqueryui.custom.min.css") }} {{ assets.tag("lib/jqueryui.custom.min.css") }}
{{ assets.tag("lib/jquery.min.js") }} {{ assets.tag("lib/jquery.min.js") }}
{{ assets.tag("lib/jquery-ui.min.js") }} {{ assets.tag("lib/jquery-ui.min.js") }}
{{ assets.tag("lib/highlight.css") }}


{{ assets.tag("index.css") }} {{ assets.tag("index.css") }}


@@ -88,6 +89,31 @@
<span class="regex">Regex</span> <span class="regex">Regex</span>
</div> </div>
</div> </div>

<div id="lower-half">
<ul>
<li>
<label for="symbols">Symbols</label>
<input type="text" name="symbols" id="symbols"><br>
</li>

<li>
<label for="functions">Functions</label>
<input type="text" name="functions" id="functions"><br>
</li>

<li>
<label for="classes">Classes</label>
<input type="text" name="classes" id="classes"><br>
</li>

<li>
<label for="variables">Variables</label>
<input type="text" name="variables" id="variables"><br>
</li>

</ul>
</div>
</div> </div>
</div> </div>
</form> </form>


+ 0
- 0
View File


+ 67
- 0
test/test_query_parser.py View File

@@ -0,0 +1,67 @@
from __future__ import unicode_literals
import unittest

from bitshift.query import parse_query

TESTS = [
# Text
("test", "Tree(Text(String(u'test')))"),
("re:test", "Tree(Text(Regex(u'test')))"),

# Language
("language:python", "Tree(Language(Python))"),
("language:py", "Tree(Language(Python))"),
("l:r:r..y", "Tree(Language(Ruby))"),
("lang:re:py|c", "Tree(BinaryOp(Language(C), OR, Language(Python)))"),

# Author
('"author:Ben Kurtovic"', "Tree(Author(String(u'Ben Kurtovic')))"),
(r"'a:re:b.*?\sk.*?'", r"Tree(Author(Regex(u'b.*?\\sk.*?')))"),

# Date
("'create:before:Jan 1, 2014'",
"Tree(Date(CREATE, BEFORE, 2014-01-01 00:00:00))"),
("'modify:after:2010-05-09 10:11:12'",
"Tree(Date(MODIFY, AFTER, 2010-05-09 10:11:12))"),

# Symbol
("sym:foobar", "Tree(Symbol(ALL, String(u'foobar')))"),
("func:foo_bar", "Tree(Symbol(FUNCTION, String(u'foo_bar')))"),
("func:foo_bar()", "Tree(Symbol(FUNCTION, String(u'foo_bar')))"),
("class:FooBar", "Tree(Symbol(CLASS, String(u'FooBar')))"),
("var:foobar", "Tree(Symbol(VARIABLE, String(u'foobar')))"),
("var:r:foobar", "Tree(Symbol(VARIABLE, Regex(u'foobar')))"),

# Composition
("(a and b) or (c and d)", ", ".join([
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND",
"Text(String(u'd')))))"])),
("a and b or c and d", ", ".join([
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND",
"Text(String(u'd')))))"])),
("a and b or c or d", ", ".join([
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "OR",
"Text(String(u'd')))))"])),
("a and (b or c or d)", ", ".join([
"Tree(BinaryOp(Text(String(u'a'))", "AND",
"BinaryOp(Text(String(u'b'))", "OR", "BinaryOp(Text(String(u'c'))", "OR",
"Text(String(u'd'))))))"])),
("a not b", ", ".join([
"Tree(BinaryOp(Text(String(u'a'))", "AND", "UnaryOp(NOT",
"Text(String(u'b')))))"])),
]

class TestQueryParser(unittest.TestCase):
"""Unit tests for the query parser in :py:mod:`bitshift.query`."""

def test_parse(self):
"""test full query parsing"""
for test, expected in TESTS:
self.assertEqual(expected, parse_query(test).serialize())


if __name__ == "__main__":
unittest.main(verbosity=2)

Loading…
Cancel
Save