import json import sys import socket import struct import subprocess from os import path from pygments import lexers as pgl, util from ..languages import LANGS, LANGS_ALL from .python import parse_py __all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] PARSER_COMMANDS = [ ('Java', ['mvn', '-f', path.join(path.dirname(__file__), "../../parsers/java/pom.xml"), 'exec:java', '-Dexec.args=%d']), ('Ruby', ['rake', '-f', path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"), 'start_server[%d]']) ] class UnsupportedFileError(Exception): pass def _lang(codelet): """ Private function to identify the language of a codelet. :param codelet: The codelet object to identified. :type code: Codelet .. todo:: Modify function to incorporate tags from stackoverflow. """ try: if codelet.filename: lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code) else: lex = pgl.guess_lexer(codelet.code) return LANGS_ALL[lex.name] except (util.ClassNotFound, KeyError): raise UnsupportedFileError(codelet.filename) def _recv_data(server_socket): """ Private function to read string response from a server. It reads a certain amount of data based on the size it is sent from the server. :param server_socket: The server that the client is connected to, and will, read from. :type code: socket.ServerSocket """ recv_size = 8192 total_data = [] size_data = cur_data = '' total_size, size = 0, sys.maxint while total_size < size: cur_data = server_socket.recv(recv_size) if not total_data: if len(size_data) > 4: size_data += cur_data size = struct.unpack('>i', size_data[:4])[0] recv_size = size if recv_size > sys.maxint: recv_size = sys.maxint total_data.append(size_data[4:]) else: size_data += cur_data else: total_data.append(cur_data) total_size = sum([len(s) for s in total_data]) server_socket.close() return ''.join(total_data) def start_parse_servers(): """ Starts all the parse servers for languages besides python. :rtype: list """ procs = [] for (lang, cmd) in PARSER_COMMANDS: cmd[-1] = cmd[-1] % (5001 + LANGS.index(lang)) procs.append(subprocess.Popen(cmd)) return procs def parse_via_server(codelet): port = 5001 + codelet.language server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_socket.connect(("localhost", port)) server_socket.send("%d\n%s" % (len(codelet.code), codelet.code)) symbols = json.loads(_recv_data(server_socket)) return symbols PARSERS = { "Python": parse_py, "Java": parse_via_server, "Ruby": parse_via_server, } def parse(codelet): """ Dispatches the codelet to the correct parser based on its language. It is the job of the respective parsers to accumulate data about the code and to convert it into a string representing a python dict. The codelet is then given dict as its 'symbols' field. :param codelet: The codelet object to parsed. :type code: Codelet """ lang = _lang(codelet) lang_string = LANGS[lang] codelet.language = lang def loc_helper(l): for i in l: if i == -1: yield None else: yield i if lang_string in PARSERS: symbols = PARSERS[lang_string](codelet) symbols = { key: [(name, [tuple(loc_helper(loc)) for loc in syms[name]["assignments"]], [tuple(loc_helper(loc)) for loc in syms[name]["uses"]]) for name in syms] for key, syms in symbols.iteritems()} codelet.symbols = symbols