瀏覽代碼

Update parser commands, change parsers to subprocesses rather than servers, implement ruby parser with subprocess.

tags/v1.0^2
Benjamin Attal 10 年之前
父節點
當前提交
ed4bc75f1e
共有 5 個文件被更改,包括 133 次插入212 次删除
  1. +14
    -76
      bitshift/parser/__init__.py
  2. +2
    -0
      bitshift/parser/python.py
  3. +4
    -3
      parsers/ruby/Rakefile
  4. +2
    -13
      parsers/ruby/lib/parse_server.rb
  5. +111
    -120
      parsers/ruby/lib/parser.rb

+ 14
- 76
bitshift/parser/__init__.py 查看文件

@@ -1,8 +1,4 @@
import functools
import json
import sys
import socket
import struct
import subprocess

from os import path
@@ -13,14 +9,15 @@ from .python import parse_py

__all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]

PARSER_COMMANDS = [
('Java', ['mvn', '-f',
path.join(path.dirname(__file__), "../../parsers/java/pom.xml"),
'exec:java', '-Dexec.args=%d']),
('Ruby', ['rake', '-f',
# TODO: Change these
PARSER_COMMANDS = {
'Java': ['java', '-cp',
path.join(path.dirname(__file__), "../../parsers/java/target/classes"),
'com.bitshift.parsing.Parse'],
'Ruby': ['rake', '-f',
path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"),
'start_server[%d]'])
]
'parse']
}

class UnsupportedFileError(Exception):
pass
@@ -46,77 +43,18 @@ def _lang(codelet):
except (util.ClassNotFound, KeyError):
raise UnsupportedFileError(codelet.filename)

def _recv_data(server_socket):
"""
Private function to read string response from a server. It reads a certain
amount of data based on the size it is sent from the server.

:param server_socket: The server that the client is connected to, and will,
read from.

:type code: socket.ServerSocket
"""

recv_size = 8192
total_data = []
size_data = cur_data = ''
total_size, size = 0, sys.maxint

while total_size < size:
cur_data = server_socket.recv(recv_size)

if not total_data:
if len(size_data) > 4:
size_data += cur_data
size = struct.unpack('>i', size_data[:4])[0]
recv_size = size
if recv_size > sys.maxint:
recv_size = sys.maxint
total_data.append(size_data[4:])
else:
size_data += cur_data

else:
total_data.append(cur_data)

total_size = sum([len(s) for s in total_data])

server_socket.close()
return ''.join(total_data)

def start_parse_servers():
"""
Starts all the parse servers for languages besides python.

:rtype: list
"""

procs = []

for (lang, cmd) in PARSER_COMMANDS:
cmd[-1] = cmd[-1] % (5001 + LANGS.index(lang))
procs.append(subprocess.Popen(cmd))

return procs

def parse_via_server(codelet, buffered=True):
port = 5001 + codelet.language
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.connect(("localhost", port))
server_socket.send("%d\n%s" % (len(codelet.code), codelet.code))

if buffered:
data = _recv_data(server_socket)
else:
data = server_socket.recv(5000)
def parse_via_proc(codelet):
proc = subprocess.Popen(PARSER_COMMANDS[LANGS[codelet.language]],
stdin=subprocess.PIPE, stdout=subprocess.PIPE)

data = proc.communicate(codelet.code)[0]
symbols = json.loads(data)
return symbols

PARSERS = {
"Python": parse_py,
"Java": functools.partial(parse_via_server, buffered=False),
"Ruby": parse_via_server,
"Java": parse_via_proc,
"Ruby": parse_via_proc,
}

def parse(codelet):


+ 2
- 0
bitshift/parser/python.py 查看文件

@@ -123,6 +123,8 @@ class _TreeWalker(ast.NodeVisitor):
pos = self.block_position(node)

self.visit(node.func)
if not self.cache:
return
name = self.cache.pop()

if not self.symbols['functions'].has_key(name):


+ 4
- 3
parsers/ruby/Rakefile 查看文件

@@ -1,5 +1,6 @@
require File.expand_path('../lib/parse_server.rb', __FILE__)
require 'pp'
require File.expand_path('../lib/parser.rb', __FILE__)

task :start_server, [:port_number] do |t, args|
start_server Integer(args[:port_number])
task :parse do |t|
parse
end

+ 2
- 13
parsers/ruby/lib/parse_server.rb 查看文件

@@ -1,18 +1,6 @@
require 'socket'
require File.expand_path('../parser.rb', __FILE__)

def pack_int(i)
bytes = []; mask = 255

while bytes.length < 4
bytes.unshift (i & mask)
i = i >> 8
end

return bytes.pack('cccc')
end


def start_server(port_number)
server = TCPServer.new port_number
puts "Ruby Server listening on port #{port_number}\n"
@@ -23,11 +11,12 @@ def start_server(port_number)
begin
# Get the amount of data to be read
size = (client.readline).to_i
eos = ">}e^"
p = Bitshift::Parser.new client.read(size)
# Get the parsed result
symbols = p.parse
client.puts pack_int(symbols.length)
client.puts symbols
client.puts eos
ensure
# Close the socket
client.close


+ 111
- 120
parsers/ruby/lib/parser.rb 查看文件

@@ -1,146 +1,137 @@
require 'ripper'
require 'pp'

module Bitshift
class Parser
def initialize(source)
@source = source
end

def parse
# new stuff
walker = TreeWalker.new(@source)
walker.parse
return walker.to_s
end
end
def parse
source = STDIN.read
walker = TreeWalker.new(source)
walker.parse
puts walker.to_s
end

class TreeWalker < Ripper::SexpBuilder
attr_accessor :symbols
class TreeWalker < Ripper::SexpBuilder
attr_accessor :symbols

def initialize(source)
ns_hash = Hash.new {
|hash, key|
hash[key] = {
:assignments => [], :uses => []
}
def initialize(source)
ns_hash = Hash.new {
|hash, key|
hash[key] = {
:assignments => [], :uses => []
}
class_hash = ns_hash.clone
function_hash = ns_hash.clone
var_hash = ns_hash.clone

@symbols = {
:namespaces => ns_hash,
:classes => class_hash,
:functions => function_hash,
:vars => var_hash
}

super(source)
end

def block_position(node)
last_node = node[0]
while last_node.is_a? Array
sp = last_node
while not (last_el = last_node[last_node.count - 1]) or
(last_el.is_a? Array and last_el[last_el.count - 1].nil?)
last_node = last_node[0..last_node.count - 2]
end
last_node = last_el
end
}
class_hash = ns_hash.clone
function_hash = ns_hash.clone
var_hash = ns_hash.clone

@symbols = {
:namespaces => ns_hash,
:classes => class_hash,
:functions => function_hash,
:vars => var_hash
}

super(source)
end

last_node = node[0]
while last_node.is_a? Array
ep = last_node
while not (last_el = last_node[last_node.count - 1]) or
(last_el.is_a? Array and last_el[last_el.count - 1].nil?)
last_node = last_node[0..last_node.count - 2]
end
last_node = last_el
def block_position(node)
last_node = node[0]
while last_node.is_a? Array
sp = last_node
while not (last_el = last_node[last_node.count - 1]) or
(last_el.is_a? Array and last_el[last_el.count - 1].nil?)
last_node = last_node[0..last_node.count - 2]
end
last_node = last_el
end

if sp == ep
return sp + [sp[0], -1]
last_node = node[0]
while last_node.is_a? Array
ep = last_node
while not (last_el = last_node[last_node.count - 1]) or
(last_el.is_a? Array and last_el[last_el.count - 1].nil?)
last_node = last_node[0..last_node.count - 2]
end
return sp + ep
last_node = last_el
end

def on_module(*node)
pos = block_position(node)
name = node[0][1][1]
symbols[:namespaces][name][:assignments] << pos
return node
if sp == ep
return sp + [sp[0], -1]
end
return sp + ep
end

def on_class(*node)
pos = block_position(node)
name = node[0][1][1]
symbols[:classes][name][:assignments] << pos
return node
end
def on_module(*node)
pos = block_position(node)
name = node[0][1][1]
symbols[:namespaces][name][:assignments] << pos
return node
end

def on_def(*node)
pos = block_position(node)
name = node[0][1]
symbols[:functions][name][:assignments] << pos
return node
end
def on_class(*node)
pos = block_position(node)
name = node[0][1][1]
symbols[:classes][name][:assignments] << pos
return node
end

def on_call(*node)
pos = block_position(node)
name = node[node.count - 1][1]
symbols[:functions][name][:uses] << pos
return node
end
def on_def(*node)
pos = block_position(node)
name = node[0][1]
symbols[:functions][name][:assignments] << pos
return node
end

def on_vcall(*node)
pos = block_position(node)
name = node[0][1]
symbols[:functions][name][:uses] << pos
return node
end
def on_call(*node)
pos = block_position(node)
name = node[node.count - 1][1]
symbols[:functions][name][:uses] << pos
return node
end

def on_assign(*node)
pos = block_position(node)
return node if not node[0][0].is_a? Array
name = node[0][0][1]
symbols[:vars][name][:assignments] << pos
return node
end
def on_vcall(*node)
pos = block_position(node)
name = node[0][1]
symbols[:functions][name][:uses] << pos
return node
end

def on_var_field(*node)
pos = block_position(node)
name = node[0][1]
symbols[:vars][name][:uses] << pos
return node
end
def on_assign(*node)
pos = block_position(node)
return node if not node[0][0].is_a? Array
name = node[0][0][1]
symbols[:vars][name][:assignments] << pos
return node
end

def on_var_ref(*node)
pos = block_position(node)
name = node[0][1]
symbols[:vars][name][:uses] << pos
return node
end
def on_var_field(*node)
pos = block_position(node)
name = node[0][1]
symbols[:vars][name][:uses] << pos
return node
end

def on_command(*node)
# catch require statements
end
def on_var_ref(*node)
pos = block_position(node)
name = node[0][1]
symbols[:vars][name][:uses] << pos
return node
end

def to_s
new_symbols = Hash.new {|hash, key| hash[key] = Hash.new}
def on_command(*node)
# catch require statements
end

symbols.each do |type, sym_list|
sym_list.each do |name, sym|
new_symbols[type.to_s][name.to_s] = {
"assignments" => sym[:assignments],
"uses" => sym[:uses]}
end
end
def to_s
new_symbols = Hash.new {|hash, key| hash[key] = Hash.new}

str = new_symbols.to_s
str = str.gsub(/=>/, ":")
return str
symbols.each do |type, sym_list|
sym_list.each do |name, sym|
new_symbols[type.to_s][name.to_s] = {
"assignments" => sym[:assignments],
"uses" => sym[:uses]}
end
end

str = new_symbols.to_s
str = str.gsub(/=>/, ":")
return str
end
end

Loading…
取消
儲存