From b16bc40d3f76eb512a3ff1656cae3a68fc3dd677 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 28 Apr 2014 00:14:07 -0400 Subject: [PATCH] Consolidate parsers into __init__.py. Update python.py parser. --- bitshift/parser/__init__.py | 64 ++++++++++++++++------ bitshift/parser/java.py | 0 bitshift/parser/python.py | 52 +++++++++++------- bitshift/parser/ruby.py | 0 .../java/com/bitshift/parsing/symbols/Symbols.java | 2 +- 5 files changed, 80 insertions(+), 38 deletions(-) delete mode 100644 bitshift/parser/java.py delete mode 100644 bitshift/parser/ruby.py diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 8d37d74..571e25d 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,9 +1,6 @@ -import pygments.lexers as pgl +import ast, pygments.lexers as pgl, sys, socket, struct from ..languages import LANGS from .python import parse_py -from .c import parse_c -from .java import parse_java -from .ruby import parse_ruby _all__ = ["parse"] @@ -24,28 +21,63 @@ def _lang(codelet): return LANGS.index(pgl.guess_lexer(codelet.code)) -def parse(codelet, pid): +def _recv_data(server_socket): + """ + Private function to read string response from a server. It reads a certain + amount of data based on the size it is sent from the server. + + :param server_socket: The server that the client is connected to, and will, + read from. + + :type code: socket.ServerSocket + """ + + recv_size = 8192 + total_data = []; size_data = cur_data = '' + total_size = 0; size = sys.maxint + + while total_size < size: + cur_data = server_socket.recv(recv_size) + + if not total_data: + if len(size_data) > 4: + size_data += cur_data + size = struct.unpack('>i', size_data[:4])[0] + recv_size = size + if recv_size > sys.maxint: recv_size = sys.maxint + total_data.append(size_data[4:]) + else: + size_data += cur_data + + else: + total_data.append(cur_data) + + total_size = sum([len(s) for s in total_data]) + + server_socket.close() + return ''.join(total_data); + + +def parse(codelet): """ Dispatches the codelet to the correct parser based on its language. :param codelet: The codelet object to parsed. - :param pid: The id of the current python process. :type code: Codelet - :param pid: str. """ - lang = _lang(codelet) + lang = _lang(codelet); source = codelet.code + server_socket_number = 5000 + lang - if lang == LANGS.index("Python"): + if lang == LANGS.index('Python'): parse_py(codelet) - elif lang == LANGS.index("C"): - parse_c(codelet) - - elif lang == LANGS.index("Java"): - parse_java(codelet) + else: + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.connect(("localhost", server_socket_number)) + server_socket.send("%d\n%s" % (len(source), source)); - elif lang == LANGS.index("Ruby"): - parse_ruby(codelet) + symbols = ast.literal_eval(_recv_data(server_socket)) + codelet.symbols = symbols diff --git a/bitshift/parser/java.py b/bitshift/parser/java.py deleted file mode 100644 index e69de29..0000000 diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 2f15cb5..ac71e29 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -62,16 +62,22 @@ class _TreeCutter(ast.NodeVisitor): if not self.accum['vars'].has_key(node.name): self.accum['vars'][node.name] = {'declaration': {}, 'uses': []} - self.accum['vars'][n.id]['declaration']['start_ln'] = line - self.accum['vars'][n.id]['declaration']['start_col'] = col - self.accum['vars'][n.id]['declaration']['end_ln'] = line - self.accum['vars'][n.id]['declaration']['end_ln'] = col + pos = {'coord': {}} + pos['coord']['start_line'] = line + pos['coord']['start_col'] = col + pos['coord']['end_line'] = line + pos['coord']['end_col'] = col + self.accum['vars'][n.id]['declaration'] = pos + else: line, col = t.lineno, t.col_offset - self.accum['vars'][t.id]['declaration']['start_ln'] = line - self.accum['vars'][t.id]['declaration']['start_col'] = col - self.accum['vars'][t.id]['declaration']['end_ln'] = line - self.accum['vars'][t.id]['declaration']['end_ln'] = col + + pos = {'coord': {}} + pos['coord']['start_line'] = line + pos['coord']['start_col'] = col + pos['coord']['end_line'] = line + pos['coord']['end_col'] = col + self.accum['vars'][t.id]['declaration'] = pos self.generic_visit(node) @@ -92,10 +98,12 @@ class _TreeCutter(ast.NodeVisitor): if not self.accum['functions'].has_key(node.name): self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} - self.accum['functions'][node.name]['declaration']['start_ln'] = start_line - self.accum['functions'][node.name]['declaration']['start_col'] = start_col - self.accum['functions'][node.name]['declaration']['end_ln'] = end_line - self.accum['functions'][node.name]['declaration']['end_ln'] = end_col + pos = {'coord': {}} + pos['coord']['start_ln']= start_line + pos['coord']['start_col'] = start_col + pos['coord']['end_ln'] = end_line + pos['coord']['end_col'] = end_col + self.accum['functions'][node.name]['declaration'] = pos self.generic_visit(node) @@ -117,11 +125,11 @@ class _TreeCutter(ast.NodeVisitor): if not self.accum['functions'].has_key(node.name): self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} - pos = {} - pos['start_line'] = line - pos['start_col'] = col - pos['end_line'] = line - pos['end_col'] = col + pos = {'coord': {}} + pos['coord']['start_line'] = line + pos['coord']['start_col'] = col + pos['coord']['end_line'] = line + pos['coord']['end_col'] = col self.accum['functions'][node.name]['calls'].append(pos) self.generic_visit(node) @@ -141,10 +149,12 @@ class _TreeCutter(ast.NodeVisitor): start_line, start_col, end_line, end_col = self.start_n_end(node) - self.accum['classes'][node.name]['start_ln'] = start_line - self.accum['classes'][node.name]['start_col'] = start_col - self.accum['classes'][node.name]['end_ln'] = end_line - self.accum['classes'][node.name]['end_ln'] = end_col + pos = {'coord': {}} + pos['coord']['start_ln']= start_line + pos['coord']['start_col'] = start_col + pos['coord']['end_ln'] = end_line + pos['coord']['end_col'] = end_col + self.accum['classes'][node.name] = pos self.generic_visit(node) diff --git a/bitshift/parser/ruby.py b/bitshift/parser/ruby.py deleted file mode 100644 index e69de29..0000000 diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java index 8bbf44d..7d6c4ea 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java @@ -5,7 +5,7 @@ import java.util.ArrayList; public abstract class Symbols { public Symbols() { - + } public static ArrayList createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) {