A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

183 regels
5.0 KiB

  1. import ast
  2. import re
  3. encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
  4. class _CachedWalker(ast.NodeVisitor):
  5. """
  6. Local node visitor for python abstract syntax trees.
  7. :ivar accum: (dict) Information on variables, functions, and classes
  8. accumulated from an abstract syntax tree.
  9. :ivar cache: (dict or None) Information stored about parent nodes. Added
  10. to accum when node reaches the lowest possible level.
  11. .. todo::
  12. Add visit funciton for ast.Name to record all uses of a variable.
  13. Use self.cache to store extra information about nodes.
  14. """
  15. def __init__(self):
  16. """
  17. Create a _TreeCutter instance.
  18. """
  19. self.accum = {'vars': {}, 'functions': {}, 'classes': {}}
  20. self.cache = []
  21. def block_position(self, node):
  22. """
  23. Helper function to get the start and end lines of an AST node.
  24. :param node: The node.
  25. :type node: ast.FunctionDef or ast.ClassDef or ast.Module
  26. """
  27. start_line, start_col = node.lineno, node.col_offset
  28. temp_node = node
  29. while 'body' in temp_node.__dict__:
  30. temp_node = temp_node.body[-1]
  31. end_line, end_col = temp_node.lineno, temp_node.col_offset
  32. return (start_line, start_col, end_line, end_col)
  33. def visit_Assign(self, node):
  34. """
  35. Visits Assign nodes in a tree. Adds relevant data about them to accum.
  36. :param node: The current node.
  37. :type node: ast.Assign
  38. .. todo::
  39. Add value and type metadata to accum.
  40. """
  41. line, col = node.lineno, node.col_offset
  42. pos = (line, col, -1, -1)
  43. self.cache.append({'nodes': []})
  44. self.generic_visit(node)
  45. last = self.cache.pop()
  46. for name in last['nodes']:
  47. if not self.accum['vars'].has_key(name):
  48. self.accum['vars'][name] = {'assignments': [], 'uses': []}
  49. self.accum['vars'][name]['assignments'].append(pos)
  50. def visit_FunctionDef(self, node):
  51. """
  52. Visits FunctionDef nodes in a tree. Adds relevant data about them to accum.
  53. :param node: The current node.
  54. :type node: ast.FunctionDef
  55. .. todo::
  56. Add arguments and decorators metadata to accum.
  57. """
  58. start_line, start_col, end_line, end_col = self.block_position(node)
  59. if not self.accum['functions'].has_key(node.name):
  60. self.accum['functions'][node.name] = {'assignments': [], 'uses': []}
  61. pos = (start_line, start_col, end_line, end_col)
  62. self.accum['functions'][node.name]['assignments'].append(pos)
  63. self.generic_visit(node)
  64. def visit_Call(self, node):
  65. """
  66. Visits Function Call nodes in a tree. Adds relevant data about them
  67. in the functions section for accum.
  68. :param node: The current node.
  69. :type node: ast.Call
  70. .. todo::
  71. Add arguments and decorators metadata to accum.
  72. """
  73. line, col = node.lineno, node.col_offset
  74. pos = (line, col, -1, -1)
  75. if isinstance(node.func, ast.Name):
  76. name = node.func.id
  77. else:
  78. name = node.func.attr
  79. if not self.accum['functions'].has_key(name):
  80. self.accum['functions'][name] = {'assignments': [], 'uses': []}
  81. self.accum['functions'][name]['uses'].append(pos)
  82. def visit_ClassDef(self, node):
  83. """
  84. Visits ClassDef nodes in a tree. Adds relevant data about them to accum.
  85. :param node: The current node.
  86. :type node: ast.ClassDef
  87. .. todo::
  88. Add arguments, inherits, and decorators metadata to accum.
  89. """
  90. start_line, start_col, end_line, end_col = self.block_position(node)
  91. pos = (start_line, start_col, end_line, end_col)
  92. self.accum['classes'][node.name] = pos
  93. self.generic_visit(node)
  94. def visit_Name(self, node):
  95. if self.cache:
  96. last = self.cache[-1]
  97. last['nodes'].append(node.id)
  98. def visit_Attribute(self, node):
  99. if self.cache:
  100. last = self.cache[-1]
  101. last['nodes'].append(node.attr)
  102. def parse_py(codelet):
  103. """
  104. Adds 'symbols' field to the codelet after parsing the python code.
  105. :param codelet: The codelet object to parsed.
  106. :type code: Codelet
  107. """
  108. def strip_encoding(lines):
  109. """Strips the encoding line from a file, which breaks the parser."""
  110. it = iter(lines)
  111. try:
  112. first = next(it)
  113. if not encoding_re.match(first):
  114. yield first
  115. second = next(it)
  116. if not encoding_re.match(second):
  117. yield second
  118. except StopIteration:
  119. return
  120. for line in it:
  121. yield line
  122. try:
  123. tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines())))
  124. except SyntaxError:
  125. ## TODO: add some logging here?
  126. return
  127. cutter = _CachedWalker()
  128. cutter.visit(tree)
  129. return cutter.accum