A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

298 lines
8.4 KiB

  1. from ..languages import LANGS
  2. __all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
  3. "BinaryOp", "UnaryOp"]
  4. class _Node(object):
  5. """Represents a single node in a query tree.
  6. Generally speaking, a node is a constraint applied to the database. Thus,
  7. a :py:class:`~.Language` node represents a constraint where only codelets
  8. of a specific language are selected.
  9. """
  10. def _null_regex(self, expr):
  11. """Implements a regex search with support for a null expression."""
  12. return "IF(ISNULL(%s), 0, %s REGEXP ?)" % (expr, expr)
  13. def sortkey(self):
  14. """Return a string sort key for the node."""
  15. return ""
  16. def parameterize(self, tables):
  17. """Parameterize the node.
  18. Returns a 4-tuple of (conditional string, parameter list, rank list,
  19. should-we-rank boolean). If the rank list is empty, then it is assumed
  20. to contain the conditional string.
  21. """
  22. return "", [], [], False
  23. class _Literal(object):
  24. """Represents a literal component of a search query, present at the leaves.
  25. A literal might be a string or a regular expression.
  26. """
  27. pass
  28. class String(_Literal):
  29. """Represents a string literal."""
  30. def __init__(self, string):
  31. """
  32. :type string: unicode
  33. """
  34. self.string = string
  35. def __repr__(self):
  36. return "String({0!r})".format(self.string)
  37. def sortkey(self):
  38. return self.string
  39. class Regex(_Literal):
  40. """Represents a regular expression literal."""
  41. def __init__(self, regex):
  42. """
  43. :type string: unicode
  44. """
  45. self.regex = regex
  46. def __repr__(self):
  47. return "Regex({0!r})".format(self.regex)
  48. def sortkey(self):
  49. return self.regex
  50. class Text(_Node):
  51. """Represents a text node.
  52. Searches in codelet names (full-text search), symbols (equality), and
  53. source code (full-text search).
  54. """
  55. def __init__(self, text):
  56. """
  57. :type text: :py:class:`._Literal`
  58. """
  59. self.text = text
  60. def __repr__(self):
  61. return "Text({0})".format(self.text)
  62. def sortkey(self):
  63. return self.text.sortkey()
  64. def parameterize(self, tables):
  65. tables |= {"code", "symbols"}
  66. if isinstance(self.text, Regex):
  67. ranks = ["(codelet_name REGEXP ?)", "(code_code REGEXP ?)",
  68. self._null_regex("symbol_name")]
  69. text = self.text.regex
  70. else:
  71. ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))",
  72. "(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))",
  73. "(symbol_name <=> ?)"]
  74. text = self.text.string
  75. cond = "(" + " OR ".join(ranks) + ")"
  76. return cond, [text] * 3, ranks, True
  77. class Language(_Node):
  78. """Represents a language node.
  79. Searches in the code_lang field.
  80. """
  81. def __init__(self, lang):
  82. """
  83. :type lang: int
  84. """
  85. self.lang = lang
  86. def __repr__(self):
  87. return "Language({0})".format(LANGS[self.lang])
  88. def sortkey(self):
  89. return LANGS[self.lang]
  90. def parameterize(self, tables):
  91. tables |= {"code"}
  92. return "(code_lang <=> ?)", [self.lang], [], False
  93. class Author(_Node):
  94. """Represents a author node.
  95. Searches in the author_name field (full-text search).
  96. """
  97. def __init__(self, name):
  98. """
  99. :type name: :py:class:`_Literal`
  100. """
  101. self.name = name
  102. def __repr__(self):
  103. return "Author({0})".format(self.name)
  104. def sortkey(self):
  105. return self.name.sortkey()
  106. def parameterize(self, tables):
  107. tables |= {"authors"}
  108. if isinstance(self.name, Regex):
  109. cond = self._null_regex("author_name")
  110. return cond, [self.name.regex], [], False
  111. cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
  112. return cond, [self.name.string], [], True
  113. class Date(_Node):
  114. """Represents a date node.
  115. Searches in the codelet_date_created or codelet_date_modified fields.
  116. """
  117. CREATE = 1
  118. MODIFY = 2
  119. BEFORE = 1
  120. AFTER = 2
  121. def __init__(self, type_, relation, date):
  122. """
  123. :type type_: int (``CREATE`` or ``MODIFY``)
  124. :type relation: int (``BEFORE``, ``AFTER``)
  125. :type date: datetime.datetime
  126. """
  127. self.type = type_
  128. self.relation = relation
  129. self.date = date
  130. def __repr__(self):
  131. types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
  132. relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
  133. tm = "Date({0}, {1}, {2})"
  134. return tm.format(types[self.type], relations[self.relation], self.date)
  135. def sortkey(self):
  136. return self.date.strftime("%Y%m%d%H%M%S")
  137. def parameterize(self, tables):
  138. column = {self.CREATE: "codelet_date_created",
  139. self.MODIFY: "codelet_date_modified"}[self.type]
  140. op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
  141. cond = "IF(ISNULL(%s), 0, %s %s ?)" % (column, column, op)
  142. return cond, [self.date], [], False
  143. class Symbol(_Node):
  144. """Represents a symbol node.
  145. Searches in symbol_type and symbol_name.
  146. """
  147. ALL = -1
  148. DEFINE = 0
  149. USE = 1
  150. FUNCTION = 0
  151. CLASS = 1
  152. VARIABLE = 2
  153. NAMESPACE = 3
  154. INTERFACE = 4
  155. IMPORT = 5
  156. TYPES = ["functions", "classes", "vars", "namespaces", "interfaces",
  157. "imports"]
  158. TYPE_REPR = ["FUNCTION", "CLASS", "VARIABLE", "NAMESPACE", "INTERFACE",
  159. "IMPORT"]
  160. def __init__(self, context, type_, name):
  161. """
  162. :type context: int (``DEFINE`` or ``USE``)
  163. :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
  164. :type name: :py:class:`._Literal`
  165. """
  166. self.context = context
  167. self.type = type_
  168. self.name = name
  169. def __repr__(self):
  170. context = ["DEFINE", "USE", "ALL"][self.context]
  171. type_ = self.TYPE_REPR[self.type] if self.type >= 0 else "ALL"
  172. return "Symbol({0}, {1}, {2})".format(context, type_, self.name)
  173. def sortkey(self):
  174. return self.name.sortkey()
  175. def parameterize(self, tables):
  176. tables |= {"code", "symbols"}
  177. if isinstance(self.name, Regex):
  178. cond, name = self._null_regex("symbol_name"), self.name.regex
  179. else:
  180. cond, name = "symbol_name <=> ?", self.name.string
  181. if self.type == self.ALL:
  182. types = ", ".join(str(typ) for typ in xrange(len(self.TYPES)))
  183. part = " AND IF(ISNULL(symbol_type), 0, symbol_type IN (%s))"
  184. cond += part % types
  185. if self.type != self.ALL:
  186. cond += " AND symbol_type <=> %d" % self.type
  187. if self.context != self.ALL:
  188. tables |= {"symbol_locations"}
  189. cond += " AND sloc_type <=> %d" % self.context
  190. return "(" + cond + ")", [name], [], False
  191. class BinaryOp(_Node):
  192. """Represents a relationship between two nodes: ``and``, ``or``."""
  193. AND = object()
  194. OR = object()
  195. OPS = {AND: "AND", OR: "OR"}
  196. def __init__(self, left, op, right):
  197. self.left = left
  198. self.op = op
  199. self.right = right
  200. def __repr__(self):
  201. tmpl = "BinaryOp({0}, {1}, {2})"
  202. return tmpl.format(self.left, self.OPS[self.op], self.right)
  203. def sortkey(self):
  204. return self.left.sortkey() + self.right.sortkey()
  205. def parameterize(self, tables):
  206. lcond, largs, lranks, need_lranks = self.left.parameterize(tables)
  207. rcond, rargs, rranks, need_rranks = self.right.parameterize(tables)
  208. lranks, rranks = lranks or [lcond], rranks or [rcond]
  209. op = self.OPS[self.op]
  210. cond = "(" + lcond + " " + op + " " + rcond + ")"
  211. need_ranks = need_lranks or need_rranks or self.op == self.OR
  212. return cond, largs + rargs, lranks + rranks, need_ranks
  213. class UnaryOp(_Node):
  214. """Represents a transformation applied to one node: ``not``."""
  215. NOT = object()
  216. OPS = {NOT: "NOT"}
  217. def __init__(self, op, node):
  218. self.op = op
  219. self.node = node
  220. def __repr__(self):
  221. return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)
  222. def sortkey(self):
  223. return self.node.sortkey()
  224. def parameterize(self, tables):
  225. cond, args, ranks, need_ranks = self.node.parameterize(tables)
  226. new_cond = "(" + self.OPS[self.op] + " " + cond + ")"
  227. ranks = ranks or [cond]
  228. return new_cond, args, ranks, need_ranks