A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

257 lines
6.6 KiB

  1. from ..languages import LANGS
  2. __all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
  3. "BinaryOp", "UnaryOp"]
  4. class _Node(object):
  5. """Represents a single node in a query tree.
  6. Generally speaking, a node is a constraint applied to the database. Thus,
  7. a :py:class:`~.Language` node represents a constraint where only codelets
  8. of a specific language are selected.
  9. """
  10. def sortkey(self):
  11. """Return a string sort key for the node."""
  12. return ""
  13. def parameterize(self, tables):
  14. """Parameterize the node.
  15. Returns a 3-tuple of (query conditional string, table set, param list).
  16. """
  17. return "", tables, []
  18. class _Literal(object):
  19. """Represents a literal component of a search query, present at the leaves.
  20. A literal might be a string or a regular expression.
  21. """
  22. pass
  23. class String(_Literal):
  24. """Represents a string literal."""
  25. def __init__(self, string):
  26. """
  27. :type string: unicode
  28. """
  29. self.string = string
  30. def __repr__(self):
  31. return "String({0!r})".format(self.string)
  32. def sortkey(self):
  33. return self.string
  34. class Regex(_Literal):
  35. """Represents a regular expression literal."""
  36. def __init__(self, regex):
  37. """
  38. :type string: unicode
  39. """
  40. self.regex = regex
  41. def __repr__(self):
  42. return "Regex({0!r})".format(self.regex)
  43. def sortkey(self):
  44. return self.regex
  45. class Text(_Node):
  46. """Represents a text node.
  47. Searches in codelet names (full-text search), symbols (equality), and
  48. source code (full-text search).
  49. """
  50. def __init__(self, text):
  51. """
  52. :type text: :py:class:`._Literal`
  53. """
  54. self.text = text
  55. def __repr__(self):
  56. return "Text({0})".format(self.text)
  57. def sortkey(self):
  58. return self.text.sortkey()
  59. def parameterize(self, tables):
  60. tables |= {"code", "symbols"}
  61. # (FTS: codelet_name, =: symbol_name, FTS: code_code) vs. node.text (_Literal)
  62. pass
  63. class Language(_Node):
  64. """Represents a language node.
  65. Searches in the code_lang field.
  66. """
  67. def __init__(self, lang):
  68. """
  69. :type lang: int
  70. """
  71. self.lang = lang
  72. def __repr__(self):
  73. return "Language({0})".format(LANGS[self.lang])
  74. def sortkey(self):
  75. return LANGS[self.lang]
  76. def parameterize(self, tables):
  77. tables |= {"code"}
  78. return "(code_lang = ?)", tables, [self.lang]
  79. class Author(_Node):
  80. """Represents a author node.
  81. Searches in the author_name field (full-text search).
  82. """
  83. def __init__(self, name):
  84. """
  85. :type name: :py:class:`_Literal`
  86. """
  87. self.name = name
  88. def __repr__(self):
  89. return "Author({0})".format(self.name)
  90. def sortkey(self):
  91. return self.name.sortkey()
  92. def parameterize(self, tables):
  93. tables |= {"authors"}
  94. if isinstance(self.name, Regex):
  95. return "(author_name REGEXP ?)", [self.name.regex]
  96. cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
  97. return cond, tables, [self.name.string]
  98. class Date(_Node):
  99. """Represents a date node.
  100. Searches in the codelet_date_created or codelet_date_modified fields.
  101. """
  102. CREATE = 1
  103. MODIFY = 2
  104. BEFORE = 1
  105. AFTER = 2
  106. def __init__(self, type_, relation, date):
  107. """
  108. :type type_: int (``CREATE`` or ``MODIFY``)
  109. :type relation: int (``BEFORE``, ``AFTER``)
  110. :type date: datetime.datetime
  111. """
  112. self.type = type_
  113. self.relation = relation
  114. self.date = date
  115. def __repr__(self):
  116. types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
  117. relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
  118. tm = "Date({0}, {1}, {2})"
  119. return tm.format(types[self.type], relations[self.relation], self.date)
  120. def sortkey(self):
  121. return self.date.strftime("%Y%m%d%H%M%S")
  122. def parameterize(self, tables):
  123. column = {self.CREATE: "codelet_date_created",
  124. self.MODIFY: "codelet_date_modified"}[self.type]
  125. op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
  126. return "(" + column + " " + op + " ?)", tables, [self.date]
  127. class Symbol(_Node):
  128. """Represents a symbol node.
  129. Searches in symbol_type and symbol_name.
  130. """
  131. ALL = 0
  132. FUNCTION = 1
  133. CLASS = 2
  134. VARIABLE = 3
  135. TYPES = {ALL: "ALL", FUNCTION: "FUNCTION", CLASS: "CLASS",
  136. VARIABLE: "VARIABLE"}
  137. def __init__(self, type_, name):
  138. """
  139. :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
  140. :type name: :py:class:`.Literal`
  141. """
  142. self.type = type_
  143. self.name = name
  144. def __repr__(self):
  145. return "Symbol({0}, {1})".format(self.TYPES[self.type], self.name)
  146. def sortkey(self):
  147. return self.name.sortkey()
  148. def parameterize(self, tables):
  149. tables |= {"symbols"}
  150. cond_base = "(symbol_type = ? AND symbol_name = ?)"
  151. if self.type != self.ALL:
  152. return cond_base, tables, [self.type, self.name]
  153. cond = "(" + " OR ".join([cond_base] * len(self.TYPES)) + ")"
  154. args = zip(self.TYPES.keys(), [self.name] * len(self.TYPES))
  155. return cond, tables, [arg for tup in args for arg in tup]
  156. class BinaryOp(_Node):
  157. """Represents a relationship between two nodes: ``and``, ``or``."""
  158. AND = object()
  159. OR = object()
  160. OPS = {AND: "AND", OR: "OR"}
  161. def __init__(self, left, op, right):
  162. self.left = left
  163. self.op = op
  164. self.right = right
  165. def __repr__(self):
  166. tmpl = "BinaryOp({0}, {1}, {2})"
  167. return tmpl.format(self.left, self.OPS[self.op], self.right)
  168. def sortkey(self):
  169. return self.left.sortkey() + self.right.sortkey()
  170. def parameterize(self, tables):
  171. left_cond, tables, left_args = self.left.parameterize(tables)
  172. right_cond, tables, right_args = self.right.parameterize(tables)
  173. op = self.OPS[self.op]
  174. cond = "(" + left_cond + " " + op + " " + right_cond + ")"
  175. return cond, tables, left_args + right_args
  176. class UnaryOp(_Node):
  177. """Represents a transformation applied to one node: ``not``."""
  178. NOT = object()
  179. OPS = {NOT: "NOT"}
  180. def __init__(self, op, node):
  181. self.op = op
  182. self.node = node
  183. def __repr__(self):
  184. return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)
  185. def sortkey(self):
  186. return self.node.sortkey()
  187. def parameterize(self, tables):
  188. cond, tables, args = self.node.parameterize(tables)
  189. return "(" + self.OPS[self.op] + " " + cond + ")", tables, args