From ed4bc75f1ee1dd2564f132d9e217342acd5da486 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sat, 14 Jun 2014 01:33:26 -0400 Subject: [PATCH] Update parser commands, change parsers to subprocesses rather than servers, implement ruby parser with subprocess. --- bitshift/parser/__init__.py | 90 +++------------ bitshift/parser/python.py | 2 + parsers/ruby/Rakefile | 7 +- parsers/ruby/lib/parse_server.rb | 15 +-- parsers/ruby/lib/parser.rb | 231 +++++++++++++++++++-------------------- 5 files changed, 133 insertions(+), 212 deletions(-) diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 6620c7c..b49eb68 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,8 +1,4 @@ -import functools import json -import sys -import socket -import struct import subprocess from os import path @@ -13,14 +9,15 @@ from .python import parse_py __all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] -PARSER_COMMANDS = [ - ('Java', ['mvn', '-f', - path.join(path.dirname(__file__), "../../parsers/java/pom.xml"), - 'exec:java', '-Dexec.args=%d']), - ('Ruby', ['rake', '-f', +# TODO: Change these +PARSER_COMMANDS = { + 'Java': ['java', '-cp', + path.join(path.dirname(__file__), "../../parsers/java/target/classes"), + 'com.bitshift.parsing.Parse'], + 'Ruby': ['rake', '-f', path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"), - 'start_server[%d]']) -] + 'parse'] +} class UnsupportedFileError(Exception): pass @@ -46,77 +43,18 @@ def _lang(codelet): except (util.ClassNotFound, KeyError): raise UnsupportedFileError(codelet.filename) -def _recv_data(server_socket): - """ - Private function to read string response from a server. It reads a certain - amount of data based on the size it is sent from the server. - - :param server_socket: The server that the client is connected to, and will, - read from. - - :type code: socket.ServerSocket - """ - - recv_size = 8192 - total_data = [] - size_data = cur_data = '' - total_size, size = 0, sys.maxint - - while total_size < size: - cur_data = server_socket.recv(recv_size) - - if not total_data: - if len(size_data) > 4: - size_data += cur_data - size = struct.unpack('>i', size_data[:4])[0] - recv_size = size - if recv_size > sys.maxint: - recv_size = sys.maxint - total_data.append(size_data[4:]) - else: - size_data += cur_data - - else: - total_data.append(cur_data) - - total_size = sum([len(s) for s in total_data]) - - server_socket.close() - return ''.join(total_data) - -def start_parse_servers(): - """ - Starts all the parse servers for languages besides python. - - :rtype: list - """ - - procs = [] - - for (lang, cmd) in PARSER_COMMANDS: - cmd[-1] = cmd[-1] % (5001 + LANGS.index(lang)) - procs.append(subprocess.Popen(cmd)) - - return procs - -def parse_via_server(codelet, buffered=True): - port = 5001 + codelet.language - server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - server_socket.connect(("localhost", port)) - server_socket.send("%d\n%s" % (len(codelet.code), codelet.code)) - - if buffered: - data = _recv_data(server_socket) - else: - data = server_socket.recv(5000) +def parse_via_proc(codelet): + proc = subprocess.Popen(PARSER_COMMANDS[LANGS[codelet.language]], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + data = proc.communicate(codelet.code)[0] symbols = json.loads(data) return symbols PARSERS = { "Python": parse_py, - "Java": functools.partial(parse_via_server, buffered=False), - "Ruby": parse_via_server, + "Java": parse_via_proc, + "Ruby": parse_via_proc, } def parse(codelet): diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 7284bac..9d66537 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -123,6 +123,8 @@ class _TreeWalker(ast.NodeVisitor): pos = self.block_position(node) self.visit(node.func) + if not self.cache: + return name = self.cache.pop() if not self.symbols['functions'].has_key(name): diff --git a/parsers/ruby/Rakefile b/parsers/ruby/Rakefile index f8cdf64..a342b24 100644 --- a/parsers/ruby/Rakefile +++ b/parsers/ruby/Rakefile @@ -1,5 +1,6 @@ -require File.expand_path('../lib/parse_server.rb', __FILE__) +require 'pp' +require File.expand_path('../lib/parser.rb', __FILE__) -task :start_server, [:port_number] do |t, args| - start_server Integer(args[:port_number]) +task :parse do |t| + parse end diff --git a/parsers/ruby/lib/parse_server.rb b/parsers/ruby/lib/parse_server.rb index 2c87e49..b24d263 100644 --- a/parsers/ruby/lib/parse_server.rb +++ b/parsers/ruby/lib/parse_server.rb @@ -1,18 +1,6 @@ require 'socket' require File.expand_path('../parser.rb', __FILE__) -def pack_int(i) - bytes = []; mask = 255 - - while bytes.length < 4 - bytes.unshift (i & mask) - i = i >> 8 - end - - return bytes.pack('cccc') -end - - def start_server(port_number) server = TCPServer.new port_number puts "Ruby Server listening on port #{port_number}\n" @@ -23,11 +11,12 @@ def start_server(port_number) begin # Get the amount of data to be read size = (client.readline).to_i + eos = ">}e^" p = Bitshift::Parser.new client.read(size) # Get the parsed result symbols = p.parse - client.puts pack_int(symbols.length) client.puts symbols + client.puts eos ensure # Close the socket client.close diff --git a/parsers/ruby/lib/parser.rb b/parsers/ruby/lib/parser.rb index 087a317..6a95ccf 100644 --- a/parsers/ruby/lib/parser.rb +++ b/parsers/ruby/lib/parser.rb @@ -1,146 +1,137 @@ require 'ripper' -require 'pp' -module Bitshift - class Parser - def initialize(source) - @source = source - end - - def parse - # new stuff - walker = TreeWalker.new(@source) - walker.parse - return walker.to_s - end - end +def parse + source = STDIN.read + walker = TreeWalker.new(source) + walker.parse + puts walker.to_s +end - class TreeWalker < Ripper::SexpBuilder - attr_accessor :symbols +class TreeWalker < Ripper::SexpBuilder + attr_accessor :symbols - def initialize(source) - ns_hash = Hash.new { - |hash, key| - hash[key] = { - :assignments => [], :uses => [] - } + def initialize(source) + ns_hash = Hash.new { + |hash, key| + hash[key] = { + :assignments => [], :uses => [] } - class_hash = ns_hash.clone - function_hash = ns_hash.clone - var_hash = ns_hash.clone - - @symbols = { - :namespaces => ns_hash, - :classes => class_hash, - :functions => function_hash, - :vars => var_hash - } - - super(source) - end - - def block_position(node) - last_node = node[0] - while last_node.is_a? Array - sp = last_node - while not (last_el = last_node[last_node.count - 1]) or - (last_el.is_a? Array and last_el[last_el.count - 1].nil?) - last_node = last_node[0..last_node.count - 2] - end - last_node = last_el - end + } + class_hash = ns_hash.clone + function_hash = ns_hash.clone + var_hash = ns_hash.clone + + @symbols = { + :namespaces => ns_hash, + :classes => class_hash, + :functions => function_hash, + :vars => var_hash + } + + super(source) + end - last_node = node[0] - while last_node.is_a? Array - ep = last_node - while not (last_el = last_node[last_node.count - 1]) or - (last_el.is_a? Array and last_el[last_el.count - 1].nil?) - last_node = last_node[0..last_node.count - 2] - end - last_node = last_el + def block_position(node) + last_node = node[0] + while last_node.is_a? Array + sp = last_node + while not (last_el = last_node[last_node.count - 1]) or + (last_el.is_a? Array and last_el[last_el.count - 1].nil?) + last_node = last_node[0..last_node.count - 2] end + last_node = last_el + end - if sp == ep - return sp + [sp[0], -1] + last_node = node[0] + while last_node.is_a? Array + ep = last_node + while not (last_el = last_node[last_node.count - 1]) or + (last_el.is_a? Array and last_el[last_el.count - 1].nil?) + last_node = last_node[0..last_node.count - 2] end - return sp + ep + last_node = last_el end - def on_module(*node) - pos = block_position(node) - name = node[0][1][1] - symbols[:namespaces][name][:assignments] << pos - return node + if sp == ep + return sp + [sp[0], -1] end + return sp + ep + end - def on_class(*node) - pos = block_position(node) - name = node[0][1][1] - symbols[:classes][name][:assignments] << pos - return node - end + def on_module(*node) + pos = block_position(node) + name = node[0][1][1] + symbols[:namespaces][name][:assignments] << pos + return node + end - def on_def(*node) - pos = block_position(node) - name = node[0][1] - symbols[:functions][name][:assignments] << pos - return node - end + def on_class(*node) + pos = block_position(node) + name = node[0][1][1] + symbols[:classes][name][:assignments] << pos + return node + end - def on_call(*node) - pos = block_position(node) - name = node[node.count - 1][1] - symbols[:functions][name][:uses] << pos - return node - end + def on_def(*node) + pos = block_position(node) + name = node[0][1] + symbols[:functions][name][:assignments] << pos + return node + end - def on_vcall(*node) - pos = block_position(node) - name = node[0][1] - symbols[:functions][name][:uses] << pos - return node - end + def on_call(*node) + pos = block_position(node) + name = node[node.count - 1][1] + symbols[:functions][name][:uses] << pos + return node + end - def on_assign(*node) - pos = block_position(node) - return node if not node[0][0].is_a? Array - name = node[0][0][1] - symbols[:vars][name][:assignments] << pos - return node - end + def on_vcall(*node) + pos = block_position(node) + name = node[0][1] + symbols[:functions][name][:uses] << pos + return node + end - def on_var_field(*node) - pos = block_position(node) - name = node[0][1] - symbols[:vars][name][:uses] << pos - return node - end + def on_assign(*node) + pos = block_position(node) + return node if not node[0][0].is_a? Array + name = node[0][0][1] + symbols[:vars][name][:assignments] << pos + return node + end - def on_var_ref(*node) - pos = block_position(node) - name = node[0][1] - symbols[:vars][name][:uses] << pos - return node - end + def on_var_field(*node) + pos = block_position(node) + name = node[0][1] + symbols[:vars][name][:uses] << pos + return node + end - def on_command(*node) - # catch require statements - end + def on_var_ref(*node) + pos = block_position(node) + name = node[0][1] + symbols[:vars][name][:uses] << pos + return node + end - def to_s - new_symbols = Hash.new {|hash, key| hash[key] = Hash.new} + def on_command(*node) + # catch require statements + end - symbols.each do |type, sym_list| - sym_list.each do |name, sym| - new_symbols[type.to_s][name.to_s] = { - "assignments" => sym[:assignments], - "uses" => sym[:uses]} - end - end + def to_s + new_symbols = Hash.new {|hash, key| hash[key] = Hash.new} - str = new_symbols.to_s - str = str.gsub(/=>/, ":") - return str + symbols.each do |type, sym_list| + sym_list.each do |name, sym| + new_symbols[type.to_s][name.to_s] = { + "assignments" => sym[:assignments], + "uses" => sym[:uses]} + end end + + str = new_symbols.to_s + str = str.gsub(/=>/, ":") + return str end end