A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

276 lines
7.6 KiB

  1. from ..languages import LANGS
  2. __all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
  3. "BinaryOp", "UnaryOp"]
  4. class _Node(object):
  5. """Represents a single node in a query tree.
  6. Generally speaking, a node is a constraint applied to the database. Thus,
  7. a :py:class:`~.Language` node represents a constraint where only codelets
  8. of a specific language are selected.
  9. """
  10. def sortkey(self):
  11. """Return a string sort key for the node."""
  12. return ""
  13. def parameterize(self, tables):
  14. """Parameterize the node.
  15. Returns a 4-tuple of (conditional string, parameter list, rank list,
  16. should-we-rank boolean). If the rank list is empty, then it is assumed
  17. to contain the conditional string.
  18. """
  19. return "", [], [], False
  20. class _Literal(object):
  21. """Represents a literal component of a search query, present at the leaves.
  22. A literal might be a string or a regular expression.
  23. """
  24. pass
  25. class String(_Literal):
  26. """Represents a string literal."""
  27. def __init__(self, string):
  28. """
  29. :type string: unicode
  30. """
  31. self.string = string
  32. def __repr__(self):
  33. return "String({0!r})".format(self.string)
  34. def sortkey(self):
  35. return self.string
  36. class Regex(_Literal):
  37. """Represents a regular expression literal."""
  38. def __init__(self, regex):
  39. """
  40. :type string: unicode
  41. """
  42. self.regex = regex
  43. def __repr__(self):
  44. return "Regex({0!r})".format(self.regex)
  45. def sortkey(self):
  46. return self.regex
  47. class Text(_Node):
  48. """Represents a text node.
  49. Searches in codelet names (full-text search), symbols (equality), and
  50. source code (full-text search).
  51. """
  52. def __init__(self, text):
  53. """
  54. :type text: :py:class:`._Literal`
  55. """
  56. self.text = text
  57. def __repr__(self):
  58. return "Text({0})".format(self.text)
  59. def sortkey(self):
  60. return self.text.sortkey()
  61. def parameterize(self, tables):
  62. tables |= {"code", "symbols"}
  63. if isinstance(self.text, Regex):
  64. ranks = ["(codelet_name REGEXP ?)", "(symbol_name REGEXP ?)",
  65. "(code_code REGEXP ?)"]
  66. text = self.text.regex
  67. else:
  68. ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))",
  69. "(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))",
  70. "(symbol_name = ?)"]
  71. text = self.text.string
  72. cond = "(" + " OR ".join(ranks) + ")"
  73. return cond, [text] * 3, ranks, True
  74. class Language(_Node):
  75. """Represents a language node.
  76. Searches in the code_lang field.
  77. """
  78. def __init__(self, lang):
  79. """
  80. :type lang: int
  81. """
  82. self.lang = lang
  83. def __repr__(self):
  84. return "Language({0})".format(LANGS[self.lang])
  85. def sortkey(self):
  86. return LANGS[self.lang]
  87. def parameterize(self, tables):
  88. tables |= {"code"}
  89. return "(code_lang = ?)", [self.lang], [], False
  90. class Author(_Node):
  91. """Represents a author node.
  92. Searches in the author_name field (full-text search).
  93. """
  94. def __init__(self, name):
  95. """
  96. :type name: :py:class:`_Literal`
  97. """
  98. self.name = name
  99. def __repr__(self):
  100. return "Author({0})".format(self.name)
  101. def sortkey(self):
  102. return self.name.sortkey()
  103. def parameterize(self, tables):
  104. tables |= {"authors"}
  105. if isinstance(self.name, Regex):
  106. return "(author_name REGEXP ?)", [self.name.regex], [], False
  107. cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
  108. return cond, [self.name.string], [], True
  109. class Date(_Node):
  110. """Represents a date node.
  111. Searches in the codelet_date_created or codelet_date_modified fields.
  112. """
  113. CREATE = 1
  114. MODIFY = 2
  115. BEFORE = 1
  116. AFTER = 2
  117. def __init__(self, type_, relation, date):
  118. """
  119. :type type_: int (``CREATE`` or ``MODIFY``)
  120. :type relation: int (``BEFORE``, ``AFTER``)
  121. :type date: datetime.datetime
  122. """
  123. self.type = type_
  124. self.relation = relation
  125. self.date = date
  126. def __repr__(self):
  127. types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
  128. relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
  129. tm = "Date({0}, {1}, {2})"
  130. return tm.format(types[self.type], relations[self.relation], self.date)
  131. def sortkey(self):
  132. return self.date.strftime("%Y%m%d%H%M%S")
  133. def parameterize(self, tables):
  134. column = {self.CREATE: "codelet_date_created",
  135. self.MODIFY: "codelet_date_modified"}[self.type]
  136. op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
  137. return "(" + column + " " + op + " ?)", [self.date], [], False
  138. class Symbol(_Node):
  139. """Represents a symbol node.
  140. Searches in symbol_type and symbol_name.
  141. """
  142. ALL = 0
  143. FUNCTION = 1
  144. CLASS = 2
  145. VARIABLE = 3
  146. TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"}
  147. def __init__(self, type_, name):
  148. """
  149. :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
  150. :type name: :py:class:`._Literal`
  151. """
  152. self.type = type_
  153. self.name = name
  154. def __repr__(self):
  155. type_ = self.TYPES.get(self.type, "ALL")
  156. return "Symbol({0}, {1})".format(type_, self.name)
  157. def sortkey(self):
  158. return self.name.sortkey()
  159. def parameterize(self, tables):
  160. tables |= {"code", "symbols"}
  161. if isinstance(self.name, Regex):
  162. cond, name = "symbol_name REGEXP ?", self.name.regex
  163. else:
  164. cond, name = "symbol_name = ?", self.name.string
  165. if self.type == self.ALL:
  166. types = ", ".join(str(type_) for type_ in self.TYPES)
  167. cond += " AND symbol_type IN (%s)" % types
  168. if self.type != self.ALL:
  169. cond += " AND symbol_type = %d" % self.type
  170. return "(" + cond + ")", [name], [], False
  171. class BinaryOp(_Node):
  172. """Represents a relationship between two nodes: ``and``, ``or``."""
  173. AND = object()
  174. OR = object()
  175. OPS = {AND: "AND", OR: "OR"}
  176. def __init__(self, left, op, right):
  177. self.left = left
  178. self.op = op
  179. self.right = right
  180. def __repr__(self):
  181. tmpl = "BinaryOp({0}, {1}, {2})"
  182. return tmpl.format(self.left, self.OPS[self.op], self.right)
  183. def sortkey(self):
  184. return self.left.sortkey() + self.right.sortkey()
  185. def parameterize(self, tables):
  186. lcond, largs, lranks, need_lranks = self.left.parameterize(tables)
  187. rcond, rargs, rranks, need_rranks = self.right.parameterize(tables)
  188. lranks, rranks = lranks or [lcond], rranks or [rcond]
  189. op = self.OPS[self.op]
  190. cond = "(" + lcond + " " + op + " " + rcond + ")"
  191. need_ranks = need_lranks or need_rranks or self.op == self.OR
  192. return cond, largs + rargs, lranks + rranks, need_ranks
  193. class UnaryOp(_Node):
  194. """Represents a transformation applied to one node: ``not``."""
  195. NOT = object()
  196. OPS = {NOT: "NOT"}
  197. def __init__(self, op, node):
  198. self.op = op
  199. self.node = node
  200. def __repr__(self):
  201. return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)
  202. def sortkey(self):
  203. return self.node.sortkey()
  204. def parameterize(self, tables):
  205. cond, args, ranks, need_ranks = self.node.parameterize(tables)
  206. new_cond = "(" + self.OPS[self.op] + " " + cond + ")"
  207. ranks = ranks or [cond]
  208. return new_cond, args, ranks, need_ranks