A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

85 lines
2.6 KiB

  1. from . import nodes
  2. __all__ = ["Tree"]
  3. QUERY_TEMPLATE = """SELECT codelet_id, MAX(codelet_rank%s) AS score
  4. FROM codelets %s
  5. WHERE %s
  6. GROUP BY codelet_id
  7. ORDER BY score DESC
  8. LIMIT %d OFFSET %d""".replace("\n", " ")
  9. class Tree(object):
  10. """Represents a query tree."""
  11. def __init__(self, root):
  12. self._root = root
  13. def __repr__(self):
  14. return "Tree({0})".format(self._root)
  15. @property
  16. def root(self):
  17. """The root node of the tree."""
  18. return self._root
  19. def sortkey(self):
  20. """Return a string sort key for the query tree."""
  21. return self._root.sortkey()
  22. def serialize(self):
  23. """Create a string representation of the query for caching.
  24. :return: Query string representation.
  25. :rtype: str
  26. """
  27. return repr(self)
  28. def walk(self, node_type=None):
  29. """Walk through the query tree, returning nodes of a specific type."""
  30. pending = [self._root]
  31. while pending:
  32. node = pending.pop()
  33. if not node_type or isinstance(node, node_type):
  34. yield node
  35. if isinstance(node, nodes.UnaryOp):
  36. pending.append(node.node)
  37. elif isinstance(node, nodes.BinaryOp):
  38. pending.extend([node.left, node.right])
  39. def build_query(self, page=1, page_size=10):
  40. """Convert the query tree into a parameterized SQL SELECT statement.
  41. :param page: The page number to get results for.
  42. :type page: int
  43. :param page_size: The number of results per page.
  44. :type page_size: int
  45. :return: SQL query data.
  46. :rtype: 2-tuple of (SQL statement string, query parameter tuple)
  47. """
  48. def get_table_joins(tables):
  49. joins = [
  50. ("INNER", "code", "codelet_code_id", "code_id"),
  51. ("LEFT", "authors", "author_codelet", "codelet_id"),
  52. ("LEFT", "symbols", "symbol_code", "code_id"),
  53. ("LEFT", "symbol_locations", "sloc_symbol", "symbol_id")
  54. ]
  55. tmpl = "%s JOIN %s ON %s = %s"
  56. for args in joins:
  57. if args[1] in tables:
  58. yield tmpl % args
  59. tables = set()
  60. cond, arglist, ranks, need_ranks = self._root.parameterize(tables)
  61. ranks = ranks or [cond]
  62. if need_ranks:
  63. score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks))
  64. else:
  65. score = ""
  66. joins = " ".join(get_table_joins(tables))
  67. offset = (page - 1) * page_size
  68. query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset)
  69. return query, tuple(arglist * 2 if need_ranks else arglist)