A semantic search engine for source code https://bitshift.benkurtovic.com/
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 
 
 

271 Zeilen
7.3 KiB

  1. from ..languages import LANGS
  2. __all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
  3. "BinaryOp", "UnaryOp"]
  4. class _Node(object):
  5. """Represents a single node in a query tree.
  6. Generally speaking, a node is a constraint applied to the database. Thus,
  7. a :py:class:`~.Language` node represents a constraint where only codelets
  8. of a specific language are selected.
  9. """
  10. def sortkey(self):
  11. """Return a string sort key for the node."""
  12. return ""
  13. def parameterize(self, tables):
  14. """Parameterize the node.
  15. Returns a 3-tuple of (conditional string, rank list, parameter list).
  16. If the rank list is empty, then it is assumed to contain the
  17. conditional string.
  18. """
  19. return "", [], []
  20. class _Literal(object):
  21. """Represents a literal component of a search query, present at the leaves.
  22. A literal might be a string or a regular expression.
  23. """
  24. pass
  25. class String(_Literal):
  26. """Represents a string literal."""
  27. def __init__(self, string):
  28. """
  29. :type string: unicode
  30. """
  31. self.string = string
  32. def __repr__(self):
  33. return "String({0!r})".format(self.string)
  34. def sortkey(self):
  35. return self.string
  36. class Regex(_Literal):
  37. """Represents a regular expression literal."""
  38. def __init__(self, regex):
  39. """
  40. :type string: unicode
  41. """
  42. self.regex = regex
  43. def __repr__(self):
  44. return "Regex({0!r})".format(self.regex)
  45. def sortkey(self):
  46. return self.regex
  47. class Text(_Node):
  48. """Represents a text node.
  49. Searches in codelet names (full-text search), symbols (equality), and
  50. source code (full-text search).
  51. """
  52. def __init__(self, text):
  53. """
  54. :type text: :py:class:`._Literal`
  55. """
  56. self.text = text
  57. def __repr__(self):
  58. return "Text({0})".format(self.text)
  59. def sortkey(self):
  60. return self.text.sortkey()
  61. def parameterize(self, tables):
  62. tables |= {"code", "symbols"}
  63. if isinstance(self.text, Regex):
  64. ranks = ["(codelet_name REGEXP ?)", "(symbol_name REGEXP ?)",
  65. "(code_code REGEXP ?)"]
  66. cond = "(" + " OR ".join(ranks) + ")"
  67. return cond, ranks, [self.text.regex] * 3
  68. else:
  69. ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))",
  70. "(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))",
  71. "(symbol_name = ?)"]
  72. cond = "(" + " OR ".join(ranks) + ")"
  73. return cond, ranks, [self.text.string] * 3
  74. class Language(_Node):
  75. """Represents a language node.
  76. Searches in the code_lang field.
  77. """
  78. def __init__(self, lang):
  79. """
  80. :type lang: int
  81. """
  82. self.lang = lang
  83. def __repr__(self):
  84. return "Language({0})".format(LANGS[self.lang])
  85. def sortkey(self):
  86. return LANGS[self.lang]
  87. def parameterize(self, tables):
  88. tables |= {"code"}
  89. return "(code_lang = ?)", [], [self.lang]
  90. class Author(_Node):
  91. """Represents a author node.
  92. Searches in the author_name field (full-text search).
  93. """
  94. def __init__(self, name):
  95. """
  96. :type name: :py:class:`_Literal`
  97. """
  98. self.name = name
  99. def __repr__(self):
  100. return "Author({0})".format(self.name)
  101. def sortkey(self):
  102. return self.name.sortkey()
  103. def parameterize(self, tables):
  104. tables |= {"authors"}
  105. if isinstance(self.name, Regex):
  106. return "(author_name REGEXP ?)", [], [self.name.regex]
  107. cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
  108. return cond, [], [self.name.string]
  109. class Date(_Node):
  110. """Represents a date node.
  111. Searches in the codelet_date_created or codelet_date_modified fields.
  112. """
  113. CREATE = 1
  114. MODIFY = 2
  115. BEFORE = 1
  116. AFTER = 2
  117. def __init__(self, type_, relation, date):
  118. """
  119. :type type_: int (``CREATE`` or ``MODIFY``)
  120. :type relation: int (``BEFORE``, ``AFTER``)
  121. :type date: datetime.datetime
  122. """
  123. self.type = type_
  124. self.relation = relation
  125. self.date = date
  126. def __repr__(self):
  127. types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
  128. relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
  129. tm = "Date({0}, {1}, {2})"
  130. return tm.format(types[self.type], relations[self.relation], self.date)
  131. def sortkey(self):
  132. return self.date.strftime("%Y%m%d%H%M%S")
  133. def parameterize(self, tables):
  134. column = {self.CREATE: "codelet_date_created",
  135. self.MODIFY: "codelet_date_modified"}[self.type]
  136. op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
  137. return "(" + column + " " + op + " ?)", [], [self.date]
  138. class Symbol(_Node):
  139. """Represents a symbol node.
  140. Searches in symbol_type and symbol_name.
  141. """
  142. ALL = 0
  143. FUNCTION = 1
  144. CLASS = 2
  145. VARIABLE = 3
  146. TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"}
  147. def __init__(self, type_, name):
  148. """
  149. :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
  150. :type name: :py:class:`.Literal`
  151. """
  152. self.type = type_
  153. self.name = name
  154. def __repr__(self):
  155. type_ = self.TYPES.get(self.type, "ALL")
  156. return "Symbol({0}, {1})".format(type_, self.name)
  157. def sortkey(self):
  158. return self.name.sortkey()
  159. def parameterize(self, tables):
  160. tables |= {"symbols"}
  161. cond_base = "(symbol_type = ? AND symbol_name = ?)"
  162. if self.type != self.ALL:
  163. return cond_base, [], [self.type, self.name]
  164. ranks = [cond_base] * len(self.TYPES)
  165. cond = "(" + " OR ".join(ranks) + ")"
  166. args = zip(self.TYPES.keys(), [self.name] * len(self.TYPES))
  167. return cond, ranks, [arg for tup in args for arg in tup]
  168. class BinaryOp(_Node):
  169. """Represents a relationship between two nodes: ``and``, ``or``."""
  170. AND = object()
  171. OR = object()
  172. OPS = {AND: "AND", OR: "OR"}
  173. def __init__(self, left, op, right):
  174. self.left = left
  175. self.op = op
  176. self.right = right
  177. def __repr__(self):
  178. tmpl = "BinaryOp({0}, {1}, {2})"
  179. return tmpl.format(self.left, self.OPS[self.op], self.right)
  180. def sortkey(self):
  181. return self.left.sortkey() + self.right.sortkey()
  182. def parameterize(self, tables):
  183. lcond, lranks, largs = self.left.parameterize(tables)
  184. rcond, rranks, rargs = self.right.parameterize(tables)
  185. lranks, rranks = lranks or [lcond], rranks or [rcond]
  186. op = self.OPS[self.op]
  187. cond = "(" + lcond + " " + op + " " + rcond + ")"
  188. return cond, lranks + rranks, largs + rargs
  189. class UnaryOp(_Node):
  190. """Represents a transformation applied to one node: ``not``."""
  191. NOT = object()
  192. OPS = {NOT: "NOT"}
  193. def __init__(self, op, node):
  194. self.op = op
  195. self.node = node
  196. def __repr__(self):
  197. return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)
  198. def sortkey(self):
  199. return self.node.sortkey()
  200. def parameterize(self, tables):
  201. cond, ranks, args = self.node.parameterize(tables)
  202. ranks = ranks or [cond]
  203. return "(" + self.OPS[self.op] + " " + cond + ")", ranks, args