From 044a448602522559a499c91d95de5c81af308962 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 5 May 2014 12:40:06 -0400 Subject: [PATCH] Change the format of the symbols to fit with earwig's issue. --- bitshift/parser/__init__.py | 14 ++-- bitshift/parser/python.py | 88 +++++++++------------- parsers/java/pom.xml | 3 +- .../com/bitshift/parsing/symbols/JavaSymbols.java | 50 ++++++++---- parsers/ruby/lib/parser.rb | 31 ++++---- 5 files changed, 93 insertions(+), 93 deletions(-) diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 5522ae6..55c76e1 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,4 +1,4 @@ -import ast, pygments.lexers as pgl, sys, socket, struct +import json, pygments.lexers as pgl, sys, socket, struct from ..languages import LANGS from .python import parse_py @@ -19,13 +19,13 @@ def _lang(codelet): Modify function to incorporate tags from stackoverflow. """ - try: - if codelet.filename is not None: + if codelet.filename is not None: + try: return pgl.guess_lexer_for_filename(codelet.filename, '').name + except: + raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') - return LANGS.index(pgl.guess_lexer(codelet.code)) - except: - raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') + return LANGS.index(pgl.guess_lexer(codelet.code)) def _recv_data(server_socket): """ @@ -88,6 +88,6 @@ def parse(codelet): server_socket.connect(("localhost", server_socket_number)) server_socket.send("%d\n%s" % (len(source), source)); - symbols = ast.literal_eval(_recv_data(server_socket)) + symbols = json.loads(_recv_data(server_socket)) codelet.symbols = symbols diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index ac71e29..7e9b109 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -1,6 +1,6 @@ import ast -class _TreeCutter(ast.NodeVisitor): +class _CachedWalker(ast.NodeVisitor): """ Local node visitor for python abstract syntax trees. @@ -22,9 +22,9 @@ class _TreeCutter(ast.NodeVisitor): """ self.accum = {'vars': {}, 'functions': {}, 'classes': {}} - self.cache = None + self.cache = [] - def start_n_end(self, node): + def block_position(self, node): """ Helper function to get the start and end lines of an AST node. @@ -54,32 +54,19 @@ class _TreeCutter(ast.NodeVisitor): Add value and type metadata to accum. """ - for t in node.targets: - if isinstance(t, ast.Tuple): - for n in t.elts: - line, col = n.lineno, n.col_offset + line, col = node.lineno, node.col_offset + pos = (line, col, line, col) - if not self.accum['vars'].has_key(node.name): - self.accum['vars'][node.name] = {'declaration': {}, 'uses': []} - - pos = {'coord': {}} - pos['coord']['start_line'] = line - pos['coord']['start_col'] = col - pos['coord']['end_line'] = line - pos['coord']['end_col'] = col - self.accum['vars'][n.id]['declaration'] = pos + self.cache.append({'nodes': []}) + self.generic_visit(node) + last = self.cache.pop() - else: - line, col = t.lineno, t.col_offset + for name in last['nodes']: + if not self.accum['vars'].has_key(name): + self.accum['vars'][name] = {'assignments': [], 'uses': []} - pos = {'coord': {}} - pos['coord']['start_line'] = line - pos['coord']['start_col'] = col - pos['coord']['end_line'] = line - pos['coord']['end_col'] = col - self.accum['vars'][t.id]['declaration'] = pos + self.accum['vars'][name]['assignments'].append(pos) - self.generic_visit(node) def visit_FunctionDef(self, node): """ @@ -93,17 +80,13 @@ class _TreeCutter(ast.NodeVisitor): Add arguments and decorators metadata to accum. """ - start_line, start_col, end_line, end_col = self.start_n_end(node) + start_line, start_col, end_line, end_col = self.block_position(node) if not self.accum['functions'].has_key(node.name): - self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} + self.accum['functions'][node.name] = {'assignments': [], 'uses': []} - pos = {'coord': {}} - pos['coord']['start_ln']= start_line - pos['coord']['start_col'] = start_col - pos['coord']['end_ln'] = end_line - pos['coord']['end_col'] = end_col - self.accum['functions'][node.name]['declaration'] = pos + pos = (start_line, start_col, end_line, end_col) + self.accum['functions'][node.name]['assignments'].append(pos) self.generic_visit(node) @@ -120,20 +103,18 @@ class _TreeCutter(ast.NodeVisitor): Add arguments and decorators metadata to accum. """ - line, col = node.line_no, node.col_offset + line, col = node.lineno, node.col_offset + pos = (line, col, line, col) - if not self.accum['functions'].has_key(node.name): - self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} + if isinstance(node.func, ast.Name): + name = node.func.id + else: + name = node.func.attr - pos = {'coord': {}} - pos['coord']['start_line'] = line - pos['coord']['start_col'] = col - pos['coord']['end_line'] = line - pos['coord']['end_col'] = col - self.accum['functions'][node.name]['calls'].append(pos) - - self.generic_visit(node) + if not self.accum['functions'].has_key(name): + self.accum['functions'][name] = {'assignments': [], 'uses': []} + self.accum['functions'][name]['uses'].append(pos) def visit_ClassDef(self, node): """ @@ -147,19 +128,22 @@ class _TreeCutter(ast.NodeVisitor): Add arguments, inherits, and decorators metadata to accum. """ - start_line, start_col, end_line, end_col = self.start_n_end(node) + start_line, start_col, end_line, end_col = self.block_position(node) - pos = {'coord': {}} - pos['coord']['start_ln']= start_line - pos['coord']['start_col'] = start_col - pos['coord']['end_ln'] = end_line - pos['coord']['end_col'] = end_col + pos = (start_line, start_col, end_line, end_col) self.accum['classes'][node.name] = pos self.generic_visit(node) def visit_Name(self, node): - pass + if self.cache: + last = self.cache[-1] + last['nodes'].append(node.id) + + def visit_Attribute(self, node): + if self.cache: + last = self.cache[-1] + last['nodes'].append(node.attr) def parse_py(codelet): """ @@ -171,6 +155,6 @@ def parse_py(codelet): """ tree = ast.parse(codelet.code) - cutter = _TreeCutter() + cutter = _CachedWalker() cutter.visit(tree) codelet.symbols = cutter.accum diff --git a/parsers/java/pom.xml b/parsers/java/pom.xml index 340feb0..cfecc30 100644 --- a/parsers/java/pom.xml +++ b/parsers/java/pom.xml @@ -16,8 +16,7 @@ 3.8.1 test - - + org.eclipse.jdt core 3.3.0-v_771 diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java index 9265feb..dd15468 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -42,9 +42,17 @@ public class JavaSymbols extends Symbols { HashMap method = this._methods.get(name); if (method == null) { method = new HashMap(); - method.put("declaration", data); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + assignments.add(data.get("coord")); + method.put("assignments", assignments); + method.put("uses", uses); } else { - method.put("declaration", data); + ArrayList assignments = (ArrayList)method.get("assignments"); + + assignments.add(data.get("coord")); + method.put("assignments", assignments); } this._methods.put(name, method); @@ -54,14 +62,17 @@ public class JavaSymbols extends Symbols { HashMap method = this._methods.get(name); if (method == null) { method = new HashMap(); - ArrayList calls = new ArrayList(10); - calls.add(data); - method.put("calls", calls); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + uses.add(data.get("coord")); + method.put("assignments", assignments); + method.put("uses", uses); } else { - ArrayList calls = (ArrayList)method.get("calls"); - calls = (calls == null) ? new ArrayList(10) : calls; - calls.add(data); - method.put("calls", calls); + ArrayList uses = (ArrayList)method.get("uses"); + + uses.add(data.get("coord")); + method.put("uses", uses); } this._methods.put(name, method); @@ -77,9 +88,17 @@ public class JavaSymbols extends Symbols { HashMap var = this._vars.get(name); if (var == null) { var = new HashMap(); - var.put("declaration", data); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + assignments.add(data.get("coord")); + var.put("assignments", assignments); + var.put("uses", uses); } else { - var.put("declaration", data); + ArrayList assignments = (ArrayList)var.get("assignments"); + + assignments.add(data.get("coord")); + var.put("assignments", assignments); } this._vars.put(name, var); @@ -89,13 +108,16 @@ public class JavaSymbols extends Symbols { HashMap var = this._vars.get(name); if (var == null) { var = new HashMap(); + ArrayList assignments = new ArrayList(10); ArrayList uses = new ArrayList(10); - uses.add(data); + + uses.add(data.get("coord")); + var.put("assignments", assignments); var.put("uses", uses); } else { ArrayList uses = (ArrayList)var.get("uses"); - uses = (uses == null) ? new ArrayList(10) : uses; - uses.add(data); + + uses.add(data.get("coord")); var.put("uses", uses); } diff --git a/parsers/ruby/lib/parser.rb b/parsers/ruby/lib/parser.rb index 150e940..c757fa0 100644 --- a/parsers/ruby/lib/parser.rb +++ b/parsers/ruby/lib/parser.rb @@ -12,23 +12,23 @@ module Bitshift parser = RubyParser.new tree = parser.parse(@source) offset = tree.line - 1 - processor = NodeVisitor.new offset, tree + processor = CachedWalker.new offset, tree processor.process(tree) return processor.to_s end end - class NodeVisitor < SexpProcessor + class CachedWalker < SexpProcessor attr_accessor :symbols attr_accessor :offset def initialize(offset, tree) super() - module_hash = Hash.new {|hash, key| hash[key] = Hash.new} + module_hash = Hash.new {|hash, key| hash[key] = { assignments: [], uses: [] }} class_hash = module_hash.clone - function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } } - var_hash = Hash.new {|hash, key| hash[key] = [] } + function_hash = module_hash.clone + var_hash = module_hash.clone @require_empty = false @offset = offset @@ -41,7 +41,6 @@ module Bitshift end def block_position(exp) - pos = Hash.new end_ln = (start_ln = exp.line - offset) cur_exp = exp @@ -51,9 +50,7 @@ module Bitshift break if cur_exp == nil end - pos[:coord] = { - start_ln: start_ln, - end_ln: end_ln } + pos = [start_ln, -1, end_ln, -1] return pos end @@ -61,9 +58,7 @@ module Bitshift pos = Hash.new end_ln = start_ln = exp.line - offset - pos[:coord] = { - start_ln: start_ln, - end_ln: end_ln } + pos = [start_ln, -1, end_ln, -1] return pos end @@ -71,7 +66,7 @@ module Bitshift pos = block_position(exp) exp.shift name = exp.shift - symbols[:modules][name] = pos + symbols[:modules][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -80,7 +75,7 @@ module Bitshift pos = block_position(exp) exp.shift name = exp.shift - symbols[:classes][name] = pos + symbols[:classes][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -89,7 +84,7 @@ module Bitshift pos = block_position(exp) exp.shift name = exp.shift - symbols[:functions][name][:declaration] = pos + symbols[:functions][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -99,7 +94,7 @@ module Bitshift exp.shift exp.shift name = exp.shift - symbols[:functions][name][:calls] << pos + symbols[:functions][name][:uses] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -108,7 +103,7 @@ module Bitshift pos = statement_position(exp) exp.shift name = exp.shift - symbols[:vars][name] << pos + symbols[:vars][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -117,7 +112,7 @@ module Bitshift pos = statement_position(exp) exp.shift name = exp.shift - symbols[:vars][name] << pos + symbols[:vars][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end