Browse Source

Only use dynamic ranks if the conditional is complex.

tags/v1.0^2
Ben Kurtovic 10 years ago
parent
commit
827ea09085
2 changed files with 26 additions and 22 deletions
  1. +18
    -15
      bitshift/query/nodes.py
  2. +8
    -7
      bitshift/query/tree.py

+ 18
- 15
bitshift/query/nodes.py View File

@@ -18,11 +18,11 @@ class _Node(object):
def parameterize(self, tables): def parameterize(self, tables):
"""Parameterize the node. """Parameterize the node.


Returns a 3-tuple of (conditional string, rank list, parameter list).
If the rank list is empty, then it is assumed to contain the
conditional string.
Returns a 4-tuple of (conditional string, parameter list, rank list,
should-we-rank boolean). If the rank list is empty, then it is assumed
to contain the conditional string.
""" """
return "", [], []
return "", [], [], False




class _Literal(object): class _Literal(object):
@@ -95,7 +95,8 @@ class Text(_Node):
"(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))", "(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))",
"(symbol_name = ?)"] "(symbol_name = ?)"]
text = self.text.string text = self.text.string
return cond, ranks, [text] * 3
cond = "(" + " OR ".join(ranks) + ")"
return cond, [text] * 3, ranks, True




class Language(_Node): class Language(_Node):
@@ -118,7 +119,7 @@ class Language(_Node):


def parameterize(self, tables): def parameterize(self, tables):
tables |= {"code"} tables |= {"code"}
return "(code_lang = ?)", [], [self.lang]
return "(code_lang = ?)", [self.lang], [], False




class Author(_Node): class Author(_Node):
@@ -142,9 +143,9 @@ class Author(_Node):
def parameterize(self, tables): def parameterize(self, tables):
tables |= {"authors"} tables |= {"authors"}
if isinstance(self.name, Regex): if isinstance(self.name, Regex):
return "(author_name REGEXP ?)", [], [self.name.regex]
return "(author_name REGEXP ?)", [self.name.regex], [], False
cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))" cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
return cond, [], [self.name.string]
return cond, [self.name.string], [], True




class Date(_Node): class Date(_Node):
@@ -181,7 +182,7 @@ class Date(_Node):
column = {self.CREATE: "codelet_date_created", column = {self.CREATE: "codelet_date_created",
self.MODIFY: "codelet_date_modified"}[self.type] self.MODIFY: "codelet_date_modified"}[self.type]
op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation] op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
return "(" + column + " " + op + " ?)", [], [self.date]
return "(" + column + " " + op + " ?)", [self.date], [], False




class Symbol(_Node): class Symbol(_Node):
@@ -221,7 +222,7 @@ class Symbol(_Node):
cond += " AND symbol_type IN (%s)" % types cond += " AND symbol_type IN (%s)" % types
if self.type != self.ALL: if self.type != self.ALL:
cond += " AND symbol_type = %d" % self.type cond += " AND symbol_type = %d" % self.type
return "(" + cond + ")", [], [name]
return "(" + cond + ")", [name], [], False




class BinaryOp(_Node): class BinaryOp(_Node):
@@ -243,12 +244,13 @@ class BinaryOp(_Node):
return self.left.sortkey() + self.right.sortkey() return self.left.sortkey() + self.right.sortkey()


def parameterize(self, tables): def parameterize(self, tables):
lcond, lranks, largs = self.left.parameterize(tables)
rcond, rranks, rargs = self.right.parameterize(tables)
lcond, largs, lranks, need_lranks = self.left.parameterize(tables)
rcond, rargs, rranks, need_rranks = self.right.parameterize(tables)
lranks, rranks = lranks or [lcond], rranks or [rcond] lranks, rranks = lranks or [lcond], rranks or [rcond]
op = self.OPS[self.op] op = self.OPS[self.op]
cond = "(" + lcond + " " + op + " " + rcond + ")" cond = "(" + lcond + " " + op + " " + rcond + ")"
return cond, lranks + rranks, largs + rargs
need_ranks = need_lranks or need_rranks or self.op == self.OR
return cond, largs + rargs, lranks + rranks, need_ranks




class UnaryOp(_Node): class UnaryOp(_Node):
@@ -267,6 +269,7 @@ class UnaryOp(_Node):
return self.node.sortkey() return self.node.sortkey()


def parameterize(self, tables): def parameterize(self, tables):
cond, ranks, args = self.node.parameterize(tables)
cond, args, ranks, need_ranks = self.node.parameterize(tables)
new_cond = "(" + self.OPS[self.op] + " " + cond + ")"
ranks = ranks or [cond] ranks = ranks or [cond]
return "(" + self.OPS[self.op] + " " + cond + ")", ranks, args
return new_cond, args, ranks, need_ranks

+ 8
- 7
bitshift/query/tree.py View File

@@ -1,6 +1,6 @@
__all__ = ["Tree"] __all__ = ["Tree"]


QUERY_TEMPLATE = """SELECT codelet_id, (codelet_rank + %s) AS score
QUERY_TEMPLATE = """SELECT codelet_id, (codelet_rank%s) AS score
FROM codelets %s FROM codelets %s
WHERE %s WHERE %s
GROUP BY codelet_id GROUP BY codelet_id
@@ -54,18 +54,19 @@ class Tree(object):
] ]
tmpl = "INNER JOIN %s ON %s = %s" tmpl = "INNER JOIN %s ON %s = %s"
for args in data: for args in data:
if table in tables:
if args[0] in tables:
yield tmpl % args yield tmpl % args


tables = set() tables = set()
cond, ranks, arglist = self._root.parameterize(tables)
cond, arglist, ranks, need_ranks = self._root.parameterize(tables)
ranks = ranks or [cond] ranks = ranks or [cond]
# TODO: if the only rank is a single thing and it's a boolean value
# (i.e. not a match statement), get rid of it.
score = "((%s) / %d)" % (" + ".join(ranks), len(ranks))
if need_ranks:
score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks))
else:
score = ""
joins = " ".join(get_table_joins(tables)) joins = " ".join(get_table_joins(tables))
offset = (page - 1) * page_size offset = (page - 1) * page_size


## TODO: handle pretty ## TODO: handle pretty
query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset) query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset)
return query, tuple(arglist * 2)
return query, tuple(arglist * 2 if need_ranks else arglist)

Loading…
Cancel
Save