A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

187 line
5.3 KiB

  1. import ast
  2. import re
  3. encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
  4. class _CachedWalker(ast.NodeVisitor):
  5. """
  6. Local node visitor for python abstract syntax trees.
  7. :ivar accum: (dict) Information on variables, functions, and classes
  8. accumulated from an abstract syntax tree.
  9. :ivar cache: (dict or None) Information stored about parent nodes. Added
  10. to accum when node reaches the lowest possible level.
  11. .. todo::
  12. Add visit funciton for ast.Name to record all uses of a variable.
  13. Use self.cache to store extra information about nodes.
  14. """
  15. def __init__(self):
  16. """
  17. Create a _TreeCutter instance.
  18. """
  19. self.accum = {'vars': {}, 'functions': {}, 'classes': {}}
  20. self.cache = []
  21. def block_position(self, node):
  22. """
  23. Helper function to get the start and end lines of an AST node.
  24. :param node: The node.
  25. :type node: ast.FunctionDef or ast.ClassDef or ast.Module
  26. """
  27. start_line, start_col = node.lineno, node.col_offset
  28. temp_node = node
  29. while 'body' in temp_node.__dict__:
  30. temp_node = temp_node.body[-1]
  31. end_line, end_col = temp_node.lineno, temp_node.col_offset
  32. return (start_line, start_col, end_line, end_col)
  33. def visit_Assign(self, node):
  34. """
  35. Visits Assign nodes in a tree. Adds relevant data about them to accum.
  36. :param node: The current node.
  37. :type node: ast.Assign
  38. .. todo::
  39. Add value and type metadata to accum.
  40. """
  41. line, col = node.lineno, node.col_offset
  42. pos = (line, col, -1, -1)
  43. self.cache.append({'nodes': []})
  44. self.generic_visit(node)
  45. last = self.cache.pop()
  46. for name in last['nodes']:
  47. if not self.accum['vars'].has_key(name):
  48. self.accum['vars'][name] = {'assignments': [], 'uses': []}
  49. self.accum['vars'][name]['assignments'].append(pos)
  50. def visit_FunctionDef(self, node):
  51. """
  52. Visits FunctionDef nodes in a tree. Adds relevant data about them to accum.
  53. :param node: The current node.
  54. :type node: ast.FunctionDef
  55. .. todo::
  56. Add arguments and decorators metadata to accum.
  57. """
  58. start_line, start_col, end_line, end_col = self.block_position(node)
  59. if not self.accum['functions'].has_key(node.name):
  60. self.accum['functions'][node.name] = {'assignments': [], 'uses': []}
  61. pos = (start_line, start_col, end_line, end_col)
  62. self.accum['functions'][node.name]['assignments'].append(pos)
  63. self.generic_visit(node)
  64. def visit_Call(self, node):
  65. """
  66. Visits Function Call nodes in a tree. Adds relevant data about them
  67. in the functions section for accum.
  68. :param node: The current node.
  69. :type node: ast.Call
  70. .. todo::
  71. Add arguments and decorators metadata to accum.
  72. """
  73. line, col = node.lineno, node.col_offset
  74. pos = (line, col, -1, -1)
  75. if isinstance(node.func, ast.Name):
  76. name = node.func.id
  77. elif isinstance(node.func, ast.Attribute):
  78. name = node.func.attr
  79. else: # Dynamically selected functions, etc:
  80. return
  81. if not self.accum['functions'].has_key(name):
  82. self.accum['functions'][name] = {'assignments': [], 'uses': []}
  83. self.accum['functions'][name]['uses'].append(pos)
  84. def visit_ClassDef(self, node):
  85. """
  86. Visits ClassDef nodes in a tree. Adds relevant data about them to accum.
  87. :param node: The current node.
  88. :type node: ast.ClassDef
  89. .. todo::
  90. Add arguments, inherits, and decorators metadata to accum.
  91. """
  92. start_line, start_col, end_line, end_col = self.block_position(node)
  93. pos = (start_line, start_col, end_line, end_col)
  94. if node.name not in self.accum['classes']:
  95. self.accum['classes'][node.name] = {'assignments': [], 'uses': []}
  96. self.accum['classes'][node.name]['assignments'].append(pos)
  97. self.generic_visit(node)
  98. def visit_Name(self, node):
  99. if self.cache:
  100. last = self.cache[-1]
  101. last['nodes'].append(node.id)
  102. def visit_Attribute(self, node):
  103. if self.cache:
  104. last = self.cache[-1]
  105. last['nodes'].append(node.attr)
  106. def parse_py(codelet):
  107. """
  108. Adds 'symbols' field to the codelet after parsing the python code.
  109. :param codelet: The codelet object to parsed.
  110. :type code: Codelet
  111. """
  112. def strip_encoding(lines):
  113. """Strips the encoding line from a file, which breaks the parser."""
  114. it = iter(lines)
  115. try:
  116. first = next(it)
  117. if not encoding_re.match(first):
  118. yield first
  119. second = next(it)
  120. if not encoding_re.match(second):
  121. yield second
  122. except StopIteration:
  123. return
  124. for line in it:
  125. yield line
  126. try:
  127. tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines())))
  128. except SyntaxError:
  129. ## TODO: add some logging here?
  130. return
  131. cutter = _CachedWalker()
  132. cutter.visit(tree)
  133. return cutter.accum