A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

218 lines
5.9 KiB

  1. import ast
  2. import re
  3. encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
  4. class _TreeWalker(ast.NodeVisitor):
  5. """
  6. Local node visitor for python abstract syntax trees.
  7. :ivar symbols: (dict) Information on variables, functions, and classes
  8. symbolsulated from an abstract syntax tree.
  9. :ivar cache: (dict or None) Information stored about parent nodes. Added
  10. to symbols when node reaches the lowest possible level.
  11. .. todo::
  12. Add visit funciton for ast.Name to record all uses of a variable.
  13. Use self.cache to store extra information about nodes.
  14. """
  15. def __init__(self):
  16. """
  17. Create a _TreeCutter instance.
  18. """
  19. self.symbols = {'vars': {}, 'functions': {}, 'classes': {}}
  20. self.cache = []
  21. def clear_cache(self):
  22. self.cache = []
  23. def block_position(self, node):
  24. """
  25. Helper function to get the start and end lines of an AST node.
  26. :param node: The node.
  27. :type node: ast.FunctionDef or ast.ClassDef or ast.Module
  28. """
  29. start_line, start_col = node.lineno, node.col_offset
  30. temp_node = node
  31. while 'body' in temp_node.__dict__:
  32. temp_node = temp_node.body[-1]
  33. end_line, end_col = temp_node.lineno, temp_node.col_offset
  34. if start_line == end_line:
  35. return [start_line, start_col, end_line, -1]
  36. return [start_line, start_col, end_line, end_col]
  37. def visit_Assign(self, node):
  38. """
  39. Visits Assign nodes in a tree. Adds relevant data about them to symbols.
  40. :param node: The current node.
  41. :type node: ast.Assign
  42. .. todo::
  43. Add value and type metadata to symbols.
  44. """
  45. pos = self.block_position(node)
  46. for t in node.targets:
  47. self.visit(t)
  48. for name in self.cache:
  49. if not self.symbols['vars'].has_key(name):
  50. self.symbols['vars'][name] = {'assignments': [], 'uses': []}
  51. self.symbols['vars'][name]['assignments'].append(pos)
  52. self.clear_cache()
  53. self.visit(node.value)
  54. for name in self.cache:
  55. if not self.symbols['vars'].has_key(name):
  56. self.symbols['vars'][name] = {'assignments': [], 'uses': []}
  57. self.symbols['vars'][name]['uses'].append(pos)
  58. self.clear_cache()
  59. def visit_FunctionDef(self, node):
  60. """
  61. Visits FunctionDef nodes in a tree. Adds relevant data about them to symbols.
  62. :param node: The current node.
  63. :type node: ast.FunctionDef
  64. .. todo::
  65. Add arguments and decorators metadata to symbols.
  66. """
  67. pos = self.block_position(node)
  68. if not self.symbols['functions'].has_key(node.name):
  69. self.symbols['functions'][node.name] = {'assignments': [], 'uses': []}
  70. self.symbols['functions'][node.name]['assignments'].append(pos)
  71. self.generic_visit(node)
  72. def visit_Call(self, node):
  73. """
  74. Visits Function Call nodes in a tree. Adds relevant data about them
  75. in the functions section for symbols.
  76. :param node: The current node.
  77. :type node: ast.Call
  78. .. todo::
  79. Add arguments and decorators metadata to symbols.
  80. """
  81. pos = self.block_position(node)
  82. self.visit(node.func)
  83. if not self.cache:
  84. return
  85. name = self.cache.pop()
  86. if not self.symbols['functions'].has_key(name):
  87. self.symbols['functions'][name] = {'assignments': [], 'uses': []}
  88. self.symbols['functions'][name]['uses'].append(pos)
  89. for name in self.cache:
  90. if not self.symbols['vars'].has_key(name):
  91. self.symbols['vars'][name] = {'assignments': [], 'uses': []}
  92. self.symbols['vars'][name]['uses'].append(pos)
  93. self.clear_cache()
  94. for a in node.args:
  95. self.visit(a)
  96. for name in self.cache:
  97. if not self.symbols['vars'].has_key(name):
  98. self.symbols['vars'][name] = {'assignments': [], 'uses': []}
  99. self.symbols['vars'][name]['uses'].append(pos)
  100. self.clear_cache()
  101. def visit_ClassDef(self, node):
  102. """
  103. Visits ClassDef nodes in a tree. Adds relevant data about them to symbols.
  104. :param node: The current node.
  105. :type node: ast.ClassDef
  106. .. todo::
  107. Add arguments, inherits, and decorators metadata to symbols.
  108. """
  109. pos = self.block_position(node)
  110. if node.name not in self.symbols['classes']:
  111. self.symbols['classes'][node.name] = {'assignments': [], 'uses': []}
  112. self.symbols['classes'][node.name]['assignments'].append(pos)
  113. self.generic_visit(node)
  114. def visit_Name(self, node):
  115. self.cache.append(node.id)
  116. def visit_Attribute(self, node):
  117. self.visit(node.value)
  118. self.cache.append(node.attr)
  119. def visit_Import(self, node):
  120. pos = self.block_position(node)
  121. # look through aliases
  122. def parse_py(codelet):
  123. """
  124. Adds 'symbols' field to the codelet after parsing the python code.
  125. :param codelet: The codelet object to parsed.
  126. :type code: Codelet
  127. """
  128. def strip_encoding(lines):
  129. """Strips the encoding line from a file, which breaks the parser."""
  130. it = iter(lines)
  131. try:
  132. first = next(it)
  133. if not encoding_re.match(first):
  134. yield first
  135. second = next(it)
  136. if not encoding_re.match(second):
  137. yield second
  138. except StopIteration:
  139. return
  140. for line in it:
  141. yield line
  142. try:
  143. tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines())))
  144. except SyntaxError:
  145. ## TODO: add some logging here?
  146. return {}
  147. walker = _TreeWalker()
  148. walker.visit(tree)
  149. return walker.symbols