A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

пре 10 година
пре 10 година
пре 10 година
пре 10 година
пре 10 година
пре 10 година
пре 10 година
пре 10 година
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. from ..languages import LANGS
  2. __all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
  3. "BinaryOp", "UnaryOp"]
  4. class _Node(object):
  5. """Represents a single node in a query tree.
  6. Generally speaking, a node is a constraint applied to the database. Thus,
  7. a :py:class:`~.Language` node represents a constraint where only codelets
  8. of a specific language are selected.
  9. """
  10. def sortkey(self):
  11. """Return a string sort key for the node."""
  12. return ""
  13. def parameterize(self, tables):
  14. """Parameterize the node.
  15. Returns a 3-tuple of (query conditional string, table set, param list).
  16. """
  17. return "", tables, []
  18. class _Literal(object):
  19. """Represents a literal component of a search query, present at the leaves.
  20. A literal might be a string or a regular expression.
  21. """
  22. pass
  23. class String(_Literal):
  24. """Represents a string literal."""
  25. def __init__(self, string):
  26. """
  27. :type string: unicode
  28. """
  29. self.string = string
  30. def __repr__(self):
  31. return "String({0!r})".format(self.string)
  32. def sortkey(self):
  33. return self.string
  34. class Regex(_Literal):
  35. """Represents a regular expression literal."""
  36. def __init__(self, regex):
  37. """
  38. :type string: unicode
  39. """
  40. self.regex = regex
  41. def __repr__(self):
  42. return "Regex({0!r})".format(self.regex)
  43. def sortkey(self):
  44. return self.regex
  45. class Text(_Node):
  46. """Represents a text node.
  47. Searches in codelet names (full-text search), symbols (equality), and
  48. source code (full-text search).
  49. """
  50. def __init__(self, text):
  51. """
  52. :type text: :py:class:`._Literal`
  53. """
  54. self.text = text
  55. def __repr__(self):
  56. return "Text({0})".format(self.text)
  57. def sortkey(self):
  58. return self.text.sortkey()
  59. def parameterize(self, tables):
  60. tables |= {"code", "symbols"}
  61. # (FTS: codelet_name, =: symbol_name, FTS: code_code) vs. node.text (_Literal)
  62. pass
  63. class Language(_Node):
  64. """Represents a language node.
  65. Searches in the code_lang field.
  66. """
  67. def __init__(self, lang):
  68. """
  69. :type lang: int
  70. """
  71. self.lang = lang
  72. def __repr__(self):
  73. return "Language({0})".format(LANGS[self.lang])
  74. def sortkey(self):
  75. return LANGS[self.lang]
  76. def parameterize(self, tables):
  77. tables |= {"code"}
  78. return "(code_lang = ?)", tables, [self.lang]
  79. class Author(_Node):
  80. """Represents a author node.
  81. Searches in the author_name field (full-text search).
  82. """
  83. def __init__(self, name):
  84. """
  85. :type name: :py:class:`_Literal`
  86. """
  87. self.name = name
  88. def __repr__(self):
  89. return "Author({0})".format(self.name)
  90. def sortkey(self):
  91. return self.name.sortkey()
  92. def parameterize(self, tables):
  93. tables |= {"authors"}
  94. if isinstance(self.name, Regex):
  95. return "(author_name REGEXP ?)", [self.name.regex]
  96. cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
  97. return cond, tables, [self.name.string]
  98. class Date(_Node):
  99. """Represents a date node.
  100. Searches in the codelet_date_created or codelet_date_modified fields.
  101. """
  102. CREATE = 1
  103. MODIFY = 2
  104. BEFORE = 1
  105. AFTER = 2
  106. def __init__(self, type_, relation, date):
  107. """
  108. :type type_: int (``CREATE`` or ``MODIFY``)
  109. :type relation: int (``BEFORE``, ``AFTER``)
  110. :type date: datetime.datetime
  111. """
  112. self.type = type_
  113. self.relation = relation
  114. self.date = date
  115. def __repr__(self):
  116. types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
  117. relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
  118. tm = "Date({0}, {1}, {2})"
  119. return tm.format(types[self.type], relations[self.relation], self.date)
  120. def sortkey(self):
  121. return self.date.strftime("%Y%m%d%H%M%S")
  122. def parameterize(self, tables):
  123. column = {self.CREATE: "codelet_date_created",
  124. self.MODIFY: "codelet_date_modified"}[self.type]
  125. op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
  126. return "(" + column + " " + op + " ?)", tables, [self.date]
  127. class Symbol(_Node):
  128. """Represents a symbol node.
  129. Searches in symbol_type and symbol_name.
  130. """
  131. ALL = 0
  132. FUNCTION = 1
  133. CLASS = 2
  134. VARIABLE = 3
  135. TYPES = {ALL: "ALL", FUNCTION: "FUNCTION", CLASS: "CLASS",
  136. VARIABLE: "VARIABLE"}
  137. def __init__(self, type_, name):
  138. """
  139. :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
  140. :type name: :py:class:`.Literal`
  141. """
  142. self.type = type_
  143. self.name = name
  144. def __repr__(self):
  145. return "Symbol({0}, {1})".format(self.TYPES[self.type], self.name)
  146. def sortkey(self):
  147. return self.name.sortkey()
  148. def parameterize(self, tables):
  149. tables |= {"symbols"}
  150. cond_base = "(symbol_type = ? AND symbol_name = ?)"
  151. if self.type != self.ALL:
  152. return cond_base, tables, [self.type, self.name]
  153. cond = "(" + " OR ".join([cond_base] * len(self.TYPES)) + ")"
  154. args = zip(self.TYPES.keys(), [self.name] * len(self.TYPES))
  155. return cond, tables, [arg for tup in args for arg in tup]
  156. class BinaryOp(_Node):
  157. """Represents a relationship between two nodes: ``and``, ``or``."""
  158. AND = object()
  159. OR = object()
  160. OPS = {AND: "AND", OR: "OR"}
  161. def __init__(self, left, op, right):
  162. self.left = left
  163. self.op = op
  164. self.right = right
  165. def __repr__(self):
  166. tmpl = "BinaryOp({0}, {1}, {2})"
  167. return tmpl.format(self.left, self.OPS[self.op], self.right)
  168. def sortkey(self):
  169. return self.left.sortkey() + self.right.sortkey()
  170. def parameterize(self, tables):
  171. left_cond, tables, left_args = self.left.parameterize(tables)
  172. right_cond, tables, right_args = self.right.parameterize(tables)
  173. op = self.OPS[self.op]
  174. cond = "(" + left_cond + " " + op + " " + right_cond + ")"
  175. return cond, tables, left_args + right_args
  176. class UnaryOp(_Node):
  177. """Represents a transformation applied to one node: ``not``."""
  178. NOT = object()
  179. OPS = {NOT: "NOT"}
  180. def __init__(self, op, node):
  181. self.op = op
  182. self.node = node
  183. def __repr__(self):
  184. return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)
  185. def sortkey(self):
  186. return self.node.sortkey()
  187. def parameterize(self, tables):
  188. cond, tables, args = self.node.parameterize(tables)
  189. return "(" + self.OPS[self.op] + " " + cond + ")", tables, args