From cf2b48e2177b673e004031f895d6edbb6c6cc9de Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 7 May 2014 12:57:37 -0400 Subject: [PATCH] More work on query tree structure. --- bitshift/database/__init__.py | 19 ++++-- bitshift/query/nodes.py | 139 +++++++++++++++++++++++++++++++++++------- bitshift/query/tree.py | 3 - 3 files changed, 131 insertions(+), 30 deletions(-) diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index 75f39da..07f46c7 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -51,6 +51,20 @@ class Database(object): "Run `python -m bitshift.database.migration`." raise RuntimeError(err) + def _search_with_query(self, cursor, query): + """Convert a query tree into SQL SELECTs, execute, and return results. + + The returned data is a 2-tuple of (list of codelet IDs, estimated + number of total results). + """ + raise NotImplementedError() ## TODO + + results = cursor.fetchall() + ids = NotImplemented ## TODO: extract ids from results + num_results = NotImplemented ## TODO: num if results else 0 + + return ids, num_results + def _get_codelets_from_ids(self, cursor, ids): """Return a list of Codelet objects given a list of codelet IDs.""" raise NotImplementedError() ## TODO @@ -112,10 +126,7 @@ class Database(object): num_results = results[0][1] * (10 ** results[0][2]) ids = [res[0] for res in results] else: # Cache miss - ## TODO: build and execute search query - results = cursor.fetchall() - ids = NotImplemented ## TODO: extract ids from results - num_results = NotImplemented ## TODO: num if results else 0 + ids, num_results = self._search_with_query(cursor, query, page) num_exp = max(len(str(num_results)) - 3, 0) num_results = int(round(num_results, -num_exp)) num_mnt = num_results / (10 ** num_exp) diff --git a/bitshift/query/nodes.py b/bitshift/query/nodes.py index c1f2dfe..8dc7fe9 100644 --- a/bitshift/query/nodes.py +++ b/bitshift/query/nodes.py @@ -1,4 +1,17 @@ -__all__ = ["Node", "Text", "BinaryOp", "UnaryOp"] +# from ..languages import LANGS + +__all__ = ["String", "Regex", "Text", "Language", "Date", "Author", "Symbol", + "BinaryOp", "UnaryOp"] + +class _Node(object): + """Represents a single node in a query tree. + + Generally speaking, a node is a constraint applied to the database. Thus, + a :py:class:`~.Language` node represents a constraint where only codelets + of a specific language are selected. + """ + pass + class _Literal(object): """Represents a literal component of a search query, present at the leaves. @@ -8,32 +21,33 @@ class _Literal(object): pass -class _String(_Literal) +class String(_Literal): """Represents a string literal.""" def __init__(self, string): + """ + :type string: unicode + """ self.string = string def __repr__(self): return "String({0!r})".format(self.string) -class _Regex(_Literal): +class Regex(_Literal): """Represents a regular expression literal.""" def __init__(self, regex): + """ + :type string: unicode + """ self.regex = regex def __repr__(self): return "Regex({0!r})".format(self.regex) -class Node(object): - """Represents a single node in a query tree.""" - pass - - -class Text(Node): +class Text(_Node): """Represents a text node. Searches in codelet names (full-text search), symbols (equality), and @@ -41,41 +55,120 @@ class Text(Node): """ def __init__(self, text): + """ + :type text: :py:class:`._Literal` + """ self.text = text def __repr__(self): return "Text({0})".format(self.text) -# Language -> code_lang (direct) -# DateRange -> codelet_date_created (cmp), codelet_date_modified (cmp) -# Author -> author_name (FTS) -# Symbol -> func, class, var -> symbol_type, symbol_name (direct) +class Language(_Node): + """Represents a language node. + Searches in the code_lang field. + """ + + def __init__(self, lang): + """ + :type lang: int + """ + self.lang = lang + + def __repr__(self): + return "Language({0})".format(LANGS[self.lang]) + + +class Date(_Node): + """Represents a date node. + + Searches in the codelet_date_created or codelet_date_modified fields. + """ + CREATE = 1 + MODIFY = 2 + + BEFORE = 1 + AFTER = 2 + + def __init__(self, type_, relation, date): + """ + :type type_: int (``CREATE`` or ``MODIFY``) + :type relation: int (``BEFORE``, ``AFTER``) + :type date: datetime.datetime + """ + self.type = type_ + self.relation = relation + self.date = date + + def __repr__(self): + types = {self.CREATE: "CREATE", self.MODIFY: MODIFY} + relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"} + tm = "Date({0}, {1}, {2})" + return tm.format(types[self.type], relations[self.relation], self.date) -class BinaryOp(Node): + +class Author(_Node): + """Represents a author node. + + Searches in the author_name field (full-text search). + """ + + def __init__(self, name): + self.name = name + + def __repr__(self): + return "Author({0})".format(self.name) + + +class Symbol(_Node): + """Represents a symbol node. + + Searches in symbol_type and symbol_name. + """ + ALL = 0 + FUNCTION = 1 + CLASS = 2 + VARIABLE = 3 + + def __init__(self, type_, name): + """ + :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.) + :type name: :py:class:`.Literal` + """ + self.type = type_ + self.name = name + + def __repr__(self): + types = {self.ALL: "ALL", self.FUNCTION: "FUNCTION", + self.CLASS: "CLASS", self.VARIABLE: "VARIABLE"} + return "Symbol({0}, {1})".format(types[self.type], name) + + +class BinaryOp(_Node): """Represents a relationship between two nodes: ``and``, ``or``.""" AND = 1 OR = 2 - def __init__(self, left, right, op): + def __init__(self, left, op, right): self.left = left - self.right = right self.op = op + self.right = right def __repr__(self): - ops = {self.AND: "And", self.OR: "Or"} - return "{0}({1}, {2})".format(ops[self.op], self.left, self.right) + ops = {self.AND: "AND", self.OR: "OR"} + tmpl = "BinaryOp({0}, {1}, {2})" + return tmpl.format(self.left, ops[self.op], self.right) -class UnaryOp(Node): +class UnaryOp(_Node): """Represents a transformation applied to one node: ``not``.""" NOT = 1 - def __init__(self, node, op): - self.node = node + def __init__(self, op, node): self.op = op + self.node = node def __repr__(self): - ops = {self.NOT: "Not"} - return "{0}({1})".format(ops[self.op], self.node) + ops = {self.NOT: "NOT"} + return "UnaryOp({0}, {1})".format(ops[self.op], self.node) diff --git a/bitshift/query/tree.py b/bitshift/query/tree.py index fe65744..3f09c0c 100644 --- a/bitshift/query/tree.py +++ b/bitshift/query/tree.py @@ -8,6 +8,3 @@ class Tree(object): def serialize(self): pass - - def build_query(self): - pass