diff --git a/.gitignore b/.gitignore index 7e00121..319057d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.swp .sass-cache .DS_Store .my.cnf @@ -39,3 +40,13 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# Maven +target + +# Ruby +!parsers/ruby/lib + +# Ctags +*/tags +log diff --git a/README.md b/README.md index 8ca31d7..96a93bc 100644 --- a/README.md +++ b/README.md @@ -32,3 +32,9 @@ root. Note that this will revert any custom changes made to the files in `docs/source/api`, so you might want to update them by hand instead. [SASS]: http://sass-lang.com/guide + +Releasing +--------- + +- Update `__version__` in `bitshift/__init__.py`, `version` in `setup.py`, and + `version` and `release` in `docs/conf.py`. diff --git a/bitshift/__init__.py b/bitshift/__init__.py index 78ca5e9..0bd031c 100644 --- a/bitshift/__init__.py +++ b/bitshift/__init__.py @@ -1 +1,8 @@ -from . import assets, codelet, config, database, parser, query, crawler +# -*- coding: utf-8 -*- + +__author__ = "Benjamin Attal, Ben Kurtovic, Severyn Kozak" +__copyright__ = "Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak" +__license__ = "MIT License" +__version__ = "0.1.dev" + +from . import assets, codelet, config, crawler, database, parser, query diff --git a/bitshift/codelet.py b/bitshift/codelet.py index 453ace0..acaa52b 100644 --- a/bitshift/codelet.py +++ b/bitshift/codelet.py @@ -18,6 +18,8 @@ class Codelet(object): code was last modified. :ivar rank: (float) A quanitification of the source code's quality, as per available ratings (stars, forks, upvotes, etc.). + :ivar symbols: (dict) Dictionary containing dictionaries of functions, classes, + variable definitions, etc. """ def __init__(self, name, code, filename, language, authors, code_url, diff --git a/bitshift/languages.py b/bitshift/languages.py new file mode 100644 index 0000000..b04c094 --- /dev/null +++ b/bitshift/languages.py @@ -0,0 +1,2 @@ + +LANGS = ["Python", "C", "Java", "Ruby"] diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index e69de29..55c76e1 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -0,0 +1,93 @@ +import json, pygments.lexers as pgl, sys, socket, struct +from ..languages import LANGS +from .python import parse_py + +_all__ = ["parse"] + +class UnsupportedFileError(Exception): + pass + +def _lang(codelet): + """ + Private function to identify the language of a codelet. + + :param codelet: The codelet object to identified. + + :type code: Codelet + + .. todo:: + Modify function to incorporate tags from stackoverflow. + """ + + if codelet.filename is not None: + try: + return pgl.guess_lexer_for_filename(codelet.filename, '').name + except: + raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') + + return LANGS.index(pgl.guess_lexer(codelet.code)) + +def _recv_data(server_socket): + """ + Private function to read string response from a server. It reads a certain + amount of data based on the size it is sent from the server. + + :param server_socket: The server that the client is connected to, and will, + read from. + + :type code: socket.ServerSocket + """ + + recv_size = 8192 + total_data = []; size_data = cur_data = '' + total_size = 0; size = sys.maxint + + while total_size < size: + cur_data = server_socket.recv(recv_size) + + if not total_data: + if len(size_data) > 4: + size_data += cur_data + size = struct.unpack('>i', size_data[:4])[0] + recv_size = size + if recv_size > sys.maxint: recv_size = sys.maxint + total_data.append(size_data[4:]) + else: + size_data += cur_data + + else: + total_data.append(cur_data) + + total_size = sum([len(s) for s in total_data]) + + server_socket.close() + return ''.join(total_data); + + +def parse(codelet): + """ + Dispatches the codelet to the correct parser based on its language. + It is the job of the respective parsers to accumulate data about the + code and to convert it into a string representing a python dict. + The codelet is then given dict as its 'symbols' field. + + :param codelet: The codelet object to parsed. + + :type code: Codelet + """ + + lang = _lang(codelet); source = codelet.code + codelet.language = lang + server_socket_number = 5000 + lang + + if lang == LANGS.index('Python'): + parse_py(codelet) + + else: + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.connect(("localhost", server_socket_number)) + server_socket.send("%d\n%s" % (len(source), source)); + + symbols = json.loads(_recv_data(server_socket)) + codelet.symbols = symbols + diff --git a/bitshift/parser/c.py b/bitshift/parser/c.py new file mode 100644 index 0000000..50ee6eb --- /dev/null +++ b/bitshift/parser/c.py @@ -0,0 +1,106 @@ +from pycparser import c_parser, c_ast + +class _TreeCutter(c_ast.NodeVisitor): + """ + Local node visitor for c abstract syntax trees. + + :ivar accum: (dict) Information on variables, functions, and structs + accumulated from an abstract syntax tree. + + :ivar cache: (dict or None) Information stored about parent nodes. Added + to accum when node reaches the lowest possible level. + + .. todo:: + Add visit function for c_ast.ID to record all uses of a variable. + + Use self.cache to store extra information about variables. + """ + + def __init__(self): + """ + Create a _TreeCutter instance. + """ + + self.accum = {'vars': {}, 'functions': {}, 'structs': {}} + self.cache = None + + def start_n_end(self, node): + pass + + def visit_FuncDecl(self, node): + """ + Visits FuncDecl nodes in a tree. Adds relevant data about them to accum + after visiting all of its children as well. + + :param node: The current node. + + :type node: c_ast.FuncDecl + + .. todo:: + Add other relevant information about functions like parameters and + return type. + """ + + self.cache['group'] = 'functions' + self.cache['meta']['end_ln'] = node.coord.line + self.cache['meta']['end_col'] = node.coord.column + + self.generic_visit(node) + + def visit_Struct(self, node): + """ + Visits Struct nodes in a tree. Adds relevant data about them to accum + after visiting all of its children as well. + + :param node: The current node. + + :type node: c_ast.Struct + + .. todo:: + Find other relevant information to add about structs. + """ + + self.cache['group'] = 'structs' + self.cache['meta']['end_ln'] = node.coord.line + self.cache['meta']['end_col'] = node.coord.column + + self.generic_visit(node) + + def visit_Decl(self, node): + """ + Visits Decl nodes in a tree. Adds relevant data about them to accum + after visiting all of its children as well. + + :param node: The current node. + + :type node: c_ast.Decl + """ + + self.cache = {'group': 'vars', 'meta': {}} + + self.cache['meta']['start_ln'] = node.coord.line + self.cache['meta']['start_col'] = node.coord.column + self.cache['meta']['end_ln'] = node.coord.line + self.cache['meta']['end_col'] = node.coord.column + + self.generic_visit(node) + + self.accum[self.cache['group']][node.name] = self.cache['meta'] + self.cache = None + +def parse_c(codelet): + """ + Adds 'symbols' field to the codelet after parsing the c code. + + :param codelet: The codelet object to parsed. + + :type code: Codelet + + .. todo:: + Preprocess c code so that no ParseErrors are thrown. + """ + + tree = c_parser.CParser().parse(codelet.code) + cutter = _TreeCutter() + cutter.visit(tree) + codelet.symbols = cutter.accum diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py new file mode 100644 index 0000000..d0cd7d3 --- /dev/null +++ b/bitshift/parser/python.py @@ -0,0 +1,160 @@ +import ast + +class _CachedWalker(ast.NodeVisitor): + """ + Local node visitor for python abstract syntax trees. + + :ivar accum: (dict) Information on variables, functions, and classes + accumulated from an abstract syntax tree. + + :ivar cache: (dict or None) Information stored about parent nodes. Added + to accum when node reaches the lowest possible level. + + .. todo:: + Add visit funciton for ast.Name to record all uses of a variable. + + Use self.cache to store extra information about nodes. + """ + + def __init__(self): + """ + Create a _TreeCutter instance. + """ + + self.accum = {'vars': {}, 'functions': {}, 'classes': {}} + self.cache = [] + + def block_position(self, node): + """ + Helper function to get the start and end lines of an AST node. + + :param node: The node. + + :type node: ast.FunctionDef or ast.ClassDef or ast.Module + """ + + start_line, start_col = node.lineno, node.col_offset + + temp_node = node + while 'body' in temp_node.__dict__: + temp_node = temp_node.body[-1] + + end_line, end_col = temp_node.lineno, temp_node.col_offset + return (start_line, start_col, end_line, end_col) + + def visit_Assign(self, node): + """ + Visits Assign nodes in a tree. Adds relevant data about them to accum. + + :param node: The current node. + + :type node: ast.Assign + + .. todo:: + Add value and type metadata to accum. + """ + + line, col = node.lineno, node.col_offset + pos = (line, col, -1, -1) + + self.cache.append({'nodes': []}) + self.generic_visit(node) + last = self.cache.pop() + + for name in last['nodes']: + if not self.accum['vars'].has_key(name): + self.accum['vars'][name] = {'assignments': [], 'uses': []} + + self.accum['vars'][name]['assignments'].append(pos) + + + def visit_FunctionDef(self, node): + """ + Visits FunctionDef nodes in a tree. Adds relevant data about them to accum. + + :param node: The current node. + + :type node: ast.FunctionDef + + .. todo:: + Add arguments and decorators metadata to accum. + """ + + start_line, start_col, end_line, end_col = self.block_position(node) + + if not self.accum['functions'].has_key(node.name): + self.accum['functions'][node.name] = {'assignments': [], 'uses': []} + + pos = (start_line, start_col, end_line, end_col) + self.accum['functions'][node.name]['assignments'].append(pos) + + self.generic_visit(node) + + def visit_Call(self, node): + """ + Visits Function Call nodes in a tree. Adds relevant data about them + in the functions section for accum. + + :param node: The current node. + + :type node: ast.Call + + .. todo:: + Add arguments and decorators metadata to accum. + """ + + line, col = node.lineno, node.col_offset + pos = (line, col, -1, -1) + + if isinstance(node.func, ast.Name): + name = node.func.id + else: + name = node.func.attr + + if not self.accum['functions'].has_key(name): + self.accum['functions'][name] = {'assignments': [], 'uses': []} + + self.accum['functions'][name]['uses'].append(pos) + + def visit_ClassDef(self, node): + """ + Visits ClassDef nodes in a tree. Adds relevant data about them to accum. + + :param node: The current node. + + :type node: ast.ClassDef + + .. todo:: + Add arguments, inherits, and decorators metadata to accum. + """ + + start_line, start_col, end_line, end_col = self.block_position(node) + + pos = (start_line, start_col, end_line, end_col) + self.accum['classes'][node.name] = pos + + self.generic_visit(node) + + def visit_Name(self, node): + if self.cache: + last = self.cache[-1] + last['nodes'].append(node.id) + + def visit_Attribute(self, node): + if self.cache: + last = self.cache[-1] + last['nodes'].append(node.attr) + +def parse_py(codelet): + """ + Adds 'symbols' field to the codelet after parsing the python code. + + :param codelet: The codelet object to parsed. + + :type code: Codelet + """ + + tree = ast.parse(codelet.code) + cutter = _CachedWalker() + cutter.visit(tree) + codelet.symbols = cutter.accum diff --git a/docs/source/api/bitshift.crawler.rst b/docs/source/api/bitshift.crawler.rst new file mode 100644 index 0000000..2add004 --- /dev/null +++ b/docs/source/api/bitshift.crawler.rst @@ -0,0 +1,27 @@ +crawler Package +=============== + +:mod:`crawler` Package +---------------------- + +.. automodule:: bitshift.crawler + :members: + :undoc-members: + :show-inheritance: + +:mod:`crawler` Module +--------------------- + +.. automodule:: bitshift.crawler.crawler + :members: + :undoc-members: + :show-inheritance: + +:mod:`indexer` Module +--------------------- + +.. automodule:: bitshift.crawler.indexer + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/source/api/bitshift.database.rst b/docs/source/api/bitshift.database.rst new file mode 100644 index 0000000..38e20b6 --- /dev/null +++ b/docs/source/api/bitshift.database.rst @@ -0,0 +1,19 @@ +database Package +================ + +:mod:`database` Package +----------------------- + +.. automodule:: bitshift.database + :members: + :undoc-members: + :show-inheritance: + +:mod:`migration` Module +----------------------- + +.. automodule:: bitshift.database.migration + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/source/api/bitshift.rst b/docs/source/api/bitshift.rst index 1b1c703..388ac71 100644 --- a/docs/source/api/bitshift.rst +++ b/docs/source/api/bitshift.rst @@ -33,19 +33,13 @@ bitshift Package :undoc-members: :show-inheritance: -:mod:`database` Module ----------------------- - -.. automodule:: bitshift.database - :members: - :undoc-members: - :show-inheritance: - Subpackages ----------- .. toctree:: + bitshift.crawler + bitshift.database bitshift.parser bitshift.query diff --git a/docs/source/conf.py b/docs/source/conf.py index 5aee357..1f9d1be 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -59,7 +59,7 @@ copyright = u'2014, Benjamin Attal, Ben Kurtovic, Severyn Kozak' # The short X.Y version. version = '0.1' # The full version, including alpha/beta/rc tags. -release = '0.1' +release = '0.1.dev' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/parsers/java/pom.xml b/parsers/java/pom.xml new file mode 100644 index 0000000..c2191b0 --- /dev/null +++ b/parsers/java/pom.xml @@ -0,0 +1,40 @@ + + 4.0.0 + + com.bitshift.parsing + parsing + jar + 1.0-SNAPSHOT + parsing + http://maven.apache.org + + + + junit + junit + 4.11 + + + org.eclipse.jdt + org.eclipse.jdt.core + 3.7.1 + + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + + com.bitshift.parsing.Parse + + + + + + + + diff --git a/parsers/java/src/main/java/com/bitshift/parsing/Parse.java b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java new file mode 100644 index 0000000..fc1d36f --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java @@ -0,0 +1,33 @@ +package com.bitshift.parsing; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.io.IOException; + +import java.net.ServerSocket; +import java.net.Socket; + +import com.bitshift.parsing.parsers.JavaParser; + +public class Parse { + + public static void main(String[] args) { + String fromClient; + String toClient; + + try { + ServerSocket server = new ServerSocket(5002); + + while(true) { + Socket clientSocket = server.accept(); + + JavaParser parser = new JavaParser(clientSocket); + Thread parserTask = new Thread(parser); + parserTask.start(); + } + } catch (IOException ex) { + } + } + +} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java new file mode 100644 index 0000000..dbe93fb --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java @@ -0,0 +1,3 @@ +package com.bitshift.parsing.parsers; + +import com.bitshift.parsing.parsers.Parser; diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java new file mode 100644 index 0000000..4ba3623 --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -0,0 +1,221 @@ +package com.bitshift.parsing.parsers; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import java.net.Socket; + +import org.eclipse.jdt.core.JavaCore; +import org.eclipse.jdt.core.dom.AST; +import org.eclipse.jdt.core.dom.ASTParser; +import org.eclipse.jdt.core.dom.ASTVisitor; +import org.eclipse.jdt.core.dom.CompilationUnit; +import org.eclipse.jdt.core.dom.ClassInstanceCreation; +import org.eclipse.jdt.core.dom.FieldDeclaration; +import org.eclipse.jdt.core.dom.MethodDeclaration; +import org.eclipse.jdt.core.dom.MethodInvocation; +import org.eclipse.jdt.core.dom.Name; +import org.eclipse.jdt.core.dom.PackageDeclaration; +import org.eclipse.jdt.core.dom.QualifiedName; +import org.eclipse.jdt.core.dom.SimpleName; +import org.eclipse.jdt.core.dom.Statement; +import org.eclipse.jdt.core.dom.TypeDeclaration; +import org.eclipse.jdt.core.dom.VariableDeclarationFragment; + +import com.bitshift.parsing.parsers.Parser; +import com.bitshift.parsing.symbols.Symbols; +import com.bitshift.parsing.symbols.JavaSymbols; + +/*TODO: Work on parsing partial java code.*/ +public class JavaParser extends Parser { + + public JavaParser(Socket clientSocket) { + super(clientSocket); + } + + @Override + protected Symbols genSymbols() { + char[] source = this.readFromClient().toCharArray(); + + ASTParser parser = ASTParser.newParser(AST.JLS3); + parser.setSource(source); + + Map options = JavaCore.getOptions(); + parser.setCompilerOptions(options); + + CompilationUnit root = (CompilationUnit) parser.createAST(null); + + NodeVisitor visitor = new NodeVisitor(root); + root.accept(visitor); + + return visitor.symbols; + } + + @Override + public void run() { + JavaSymbols symbols = (JavaSymbols) this.genSymbols(); + writeToClient(symbols.toString()); + } + + class NodeVisitor extends ASTVisitor { + + protected CompilationUnit root; + protected JavaSymbols symbols; + private Stack> _cache; + + public NodeVisitor(CompilationUnit root) { + this.root = root; + this.symbols = new JavaSymbols(); + this._cache = new Stack>(); + } + + public boolean visit(FieldDeclaration node) { + HashMap data = new HashMap(); + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); + + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); + this._cache.push(data); + return true; + } + + public void endVisit(FieldDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertFieldDeclaration(name, data); + } + + public boolean visit(MethodDeclaration node) { + HashMap data = new HashMap(); + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + List statements = node.getBody().statements(); + + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); + Integer el = -1; + Integer ec = -1; + + if (statements.size() > 0) { + Statement last = statements.get(statements.size() - 1); + el = this.root.getLineNumber(last.getStartPosition()); + ec = this.root.getColumnNumber(last.getStartPosition()); + } + + data.put("coord", Symbols.createCoord(sl, sc, el, ec)); + data.put("name", name); + this._cache.push(data); + return true; + } + + public void endVisit(MethodDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertMethodDeclaration(name, data); + } + + public boolean visit(MethodInvocation node) { + HashMap data = new HashMap(); + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); + + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); + data.put("name", name); + this._cache.push(data); + return true; + } + + public void endVisit(MethodInvocation node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertMethodInvocation(name, data); + } + + public boolean visit(PackageDeclaration node) { + HashMap data = new HashMap(); + this._cache.push(data); + return true; + } + + public void endVisit(PackageDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.setPackage(name); + } + + public boolean visit(TypeDeclaration node) { + HashMap data = new HashMap(); + + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); + + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); + this._cache.push(data); + return true; + } + + public void endVisit(TypeDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + + if (node.isInterface()) { + this.symbols.insertInterfaceDeclaration(name, data); + } else { + this.symbols.insertClassDeclaration(name, data); + } + } + + public boolean visit(VariableDeclarationFragment node) { + HashMap data = new HashMap(); + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); + + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); + this._cache.push(data); + return true; + } + + public void endVisit(VariableDeclarationFragment node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertVariableDeclaration(name, data); + } + + public boolean visit(QualifiedName node) { + if (!this._cache.empty()) { + HashMap data = this._cache.pop(); + + if(!data.containsKey("name")) { + String name = node.getFullyQualifiedName(); + data.put("name", name); + } + + this._cache.push(data); + } + return true; + } + + public boolean visit(SimpleName node) { + if (!this._cache.empty()) { + HashMap data = this._cache.pop(); + + if(!data.containsKey("name")) { + String name = node.getIdentifier(); + data.put("name", name); + } + + this._cache.push(data); + } + return true; + } + + } +} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java new file mode 100644 index 0000000..9d00954 --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java @@ -0,0 +1,64 @@ +package com.bitshift.parsing.parsers; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.io.IOException; + +import java.net.Socket; + +import com.bitshift.parsing.symbols.Symbols; +import com.bitshift.parsing.utils.PackableMemory; + +public abstract class Parser implements Runnable { + + protected Socket clientSocket; + + public Parser(Socket clientSocket) { + this.clientSocket = clientSocket; + } + + protected String readFromClient() { + String fromClient = ""; + + try { + BufferedReader clientReader = new BufferedReader( + new InputStreamReader(this.clientSocket.getInputStream())); + + int bytes = Integer.parseInt(clientReader.readLine()); + + StringBuilder builder = new StringBuilder(); + int i = 0; + + while(i < bytes) { + char aux = (char)clientReader.read(); + builder.append(aux); + i++; + } + + fromClient = builder.toString(); + + } catch (IOException ex) { + } + + return fromClient; + } + + protected void writeToClient(String toClient) { + try { + PrintWriter clientWriter = new PrintWriter( + this.clientSocket.getOutputStream(), true); + + PackableMemory mem = new PackableMemory(toClient.length()); + String dataSize = new String(mem.mem); + clientWriter.println(dataSize + toClient); + } catch (IOException ex) { + } + } + + protected abstract Symbols genSymbols(); + + public abstract void run(); + +} + diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java new file mode 100644 index 0000000..9abd60d --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java @@ -0,0 +1 @@ +package com.bitshift.parsing.symbols; diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java new file mode 100644 index 0000000..5419d5a --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -0,0 +1,147 @@ +package com.bitshift.parsing.symbols; + +import java.util.HashMap; +import java.util.ArrayList; +import com.bitshift.parsing.symbols.Symbols; + +/*TODO: Overwrite toString.*/ +public class JavaSymbols extends Symbols { + + private String _packageName; + private HashMap> _classes; + private HashMap> _interfaces; + private HashMap> _methods; + private HashMap> _fields; + private HashMap> _vars; + + public JavaSymbols() { + _packageName = null; + _classes = new HashMap>(); + _interfaces = new HashMap>(); + _methods = new HashMap>(); + _fields = new HashMap>(); + _vars = new HashMap>(); + } + + public boolean setPackage(String name) { + _packageName = name; + return true; + } + + public boolean insertClassDeclaration(String name, HashMap data) { + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + HashMap klass = new HashMap(); + + assignments.add(data.get("coord")); + klass.put("assignments", assignments); + klass.put("uses", uses); + + this._classes.put(name, klass); + return true; + } + + public boolean insertInterfaceDeclaration(String name, HashMap data) { + this._interfaces.put(name, data); + return true; + } + + public boolean insertMethodDeclaration(String name, HashMap data) { + HashMap method = this._methods.get(name); + if (method == null) { + method = new HashMap(); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + assignments.add(data.get("coord")); + method.put("assignments", assignments); + method.put("uses", uses); + } else { + ArrayList assignments = (ArrayList)method.get("assignments"); + + assignments.add(data.get("coord")); + method.put("assignments", assignments); + } + + this._methods.put(name, method); + return true; + } + public boolean insertMethodInvocation(String name, HashMap data) { + HashMap method = this._methods.get(name); + if (method == null) { + method = new HashMap(); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + uses.add(data.get("coord")); + method.put("assignments", assignments); + method.put("uses", uses); + } else { + ArrayList uses = (ArrayList)method.get("uses"); + + uses.add(data.get("coord")); + method.put("uses", uses); + } + + this._methods.put(name, method); + return true; + } + + public boolean insertFieldDeclaration(String name, HashMap data) { + this._fields.put(name, data); + return true; + } + + public boolean insertVariableDeclaration(String name, HashMap data) { + HashMap var = this._vars.get(name); + if (var == null) { + var = new HashMap(); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + assignments.add(data.get("coord")); + var.put("assignments", assignments); + var.put("uses", uses); + } else { + ArrayList assignments = (ArrayList)var.get("assignments"); + + assignments.add(data.get("coord")); + var.put("assignments", assignments); + } + + this._vars.put(name, var); + return true; + } + public boolean insertVariableAccess(String name, HashMap data) { + HashMap var = this._vars.get(name); + if (var == null) { + var = new HashMap(); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + uses.add(data.get("coord")); + var.put("assignments", assignments); + var.put("uses", uses); + } else { + ArrayList uses = (ArrayList)var.get("uses"); + + uses.add(data.get("coord")); + var.put("uses", uses); + } + + this._vars.put(name, var); + return true; + } + + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("classes:" + this._classes + ","); + builder.append("interfaces:" + this._interfaces + ","); + builder.append("methods:" + this._methods + ","); + builder.append("fields:" + this._fields + ","); + builder.append("vars:" + this._vars + ","); + + return "{" + builder.toString() + "}"; + } +} + diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java new file mode 100644 index 0000000..7d6c4ea --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java @@ -0,0 +1,17 @@ +package com.bitshift.parsing.symbols; + +import java.util.ArrayList; + +public abstract class Symbols { + + public Symbols() { + + } + + public static ArrayList createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + ArrayList coord = new ArrayList(4); + coord.add(startLine); coord.add(startCol); coord.add(endLine); coord.add(endCol); + return coord; + } + +} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java new file mode 100644 index 0000000..24d883c --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java @@ -0,0 +1,89 @@ +package com.bitshift.parsing.utils; + +//This class contains implementations of methods to +// -- pack an integer into 4 consecutive bytes of a byte array +// -- unpack an integer from 4 consecutive bytes of a byte array +// -- exhaustively test the pack and unpack methods. +// +// This file should be saved as PackableMemory.java. Once it has been +// compiled, the tester can be invoked by typing "java PackableMemory" + +public class PackableMemory { + int size; + public byte mem[] = null; + + public PackableMemory(int size) + { + this.size = size; + this.mem = new byte[size]; + } + + // Pack the 4-byte integer val into the four bytes mem[loc]...mem[loc+3]. + // The most significant porion of the integer is stored in mem[loc]. + // Bytes are masked out of the integer and stored in the array, working + // from right(least significant) to left (most significant). + void pack(int val, int loc) + { + final int MASK = 0xff; + for (int i = 3; i >= 0; i--) + { + mem[loc+i] = (byte)(val & MASK); + val = val >> 8; + } + } + + // Unpack the four bytes mem[loc]...mem[loc+3] into a 4-byte integer, + // and return the resulting integer value. + // The most significant porion of the integer is stored in mem[loc]. + // Bytes are 'OR'ed into the integer, working from left (most significant) + // to right (least significant) + int unpack(int loc) + { + final int MASK = 0xff; + int v = (int)mem[loc] & MASK; + for (int i = 1; i < 4; i++) + { + v = v << 8; + v = v | ((int)mem[loc+i] & MASK); + } + return v; + } + + + + // Test the above pack and unpack methods by iterating the following + // over all possible 4-byte integers: pack the integer, + // then unpack it, and then verify that the unpacked integer equals the + // original integer. It tests all nonnegative numbers in ascending order + // and then all negative numbers in ascending order. The transition from + // positive to negative numbers happens implicitly due to integer overflow. + public void packTest() + { + + int i = 0; + long k = 0; + do + { + this.pack(i,4); + int j = this.unpack(4); + if (j != i) + { + System.out.printf("pack/unpack test failed: i = %d, j = %d\n",i,j); + System.exit(0); + } + i++; k++; + } + while (i != 0); + System.out.printf("pack/unpack test successful, %d iterations\n",k); + } + + // main routine to test the PackableMemory class by running the + // packTest() method. + public static void main(String[] args) + { + PackableMemory pm = new PackableMemory(100); + pm.packTest(); + System.exit(0); + } +} + diff --git a/parsers/ruby/Gemfile b/parsers/ruby/Gemfile new file mode 100644 index 0000000..cfb76e2 --- /dev/null +++ b/parsers/ruby/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +gem 'ruby_parser' +gem 'sexp_processor' diff --git a/parsers/ruby/Rakefile b/parsers/ruby/Rakefile new file mode 100644 index 0000000..e66f695 --- /dev/null +++ b/parsers/ruby/Rakefile @@ -0,0 +1,5 @@ +require File.expand_path('../lib/parse_server.rb', __FILE__) + +task :start_server do |t| + start_server +end diff --git a/parsers/ruby/lib/parse_server.rb b/parsers/ruby/lib/parse_server.rb new file mode 100644 index 0000000..916f434 --- /dev/null +++ b/parsers/ruby/lib/parse_server.rb @@ -0,0 +1,36 @@ +require 'socket' +require File.expand_path('../parser.rb', __FILE__) + +def pack_int(i) + bytes = []; mask = 255 + + while bytes.length < 4 + bytes.unshift (i & mask) + i = i >> 8 + end + + return bytes.pack('cccc') +end + + +def start_server + server = TCPServer.new 5003 + + loop do + # Start a new thread for each client accepted + Thread.start(server.accept) do |client| + begin + # Get the amount of data to be read + size = (client.readline).to_i + p = Bitshift::Parser.new client.read(size) + # Get the parsed result + symbols = p.parse + client.puts pack_int(symbols.length) + client.puts symbols + ensure + # Close the socket + client.close + end + end + end +end diff --git a/parsers/ruby/lib/parser.rb b/parsers/ruby/lib/parser.rb new file mode 100644 index 0000000..c757fa0 --- /dev/null +++ b/parsers/ruby/lib/parser.rb @@ -0,0 +1,126 @@ +require 'socket' +require 'ruby_parser' +require 'sexp_processor' + +module Bitshift + class Parser + def initialize(source) + @source = source + end + + def parse + parser = RubyParser.new + tree = parser.parse(@source) + offset = tree.line - 1 + processor = CachedWalker.new offset, tree + processor.process(tree) + return processor.to_s + end + end + + class CachedWalker < SexpProcessor + attr_accessor :symbols + attr_accessor :offset + + def initialize(offset, tree) + super() + + module_hash = Hash.new {|hash, key| hash[key] = { assignments: [], uses: [] }} + class_hash = module_hash.clone + function_hash = module_hash.clone + var_hash = module_hash.clone + + @require_empty = false + @offset = offset + @symbols = { + modules: module_hash, + classes: class_hash, + functions: function_hash, + vars: var_hash + } + end + + def block_position(exp) + end_ln = (start_ln = exp.line - offset) + cur_exp = exp + + while cur_exp.is_a? Sexp + end_ln = cur_exp.line - offset + cur_exp = cur_exp.last + break if cur_exp == nil + end + + pos = [start_ln, -1, end_ln, -1] + return pos + end + + def statement_position(exp) + pos = Hash.new + end_ln = start_ln = exp.line - offset + + pos = [start_ln, -1, end_ln, -1] + return pos + end + + def process_module(exp) + pos = block_position(exp) + exp.shift + name = exp.shift + symbols[:modules][name][:assignments] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_class(exp) + pos = block_position(exp) + exp.shift + name = exp.shift + symbols[:classes][name][:assignments] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_defn(exp) + pos = block_position(exp) + exp.shift + name = exp.shift + symbols[:functions][name][:assignments] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_call(exp) + pos = statement_position(exp) + exp.shift + exp.shift + name = exp.shift + symbols[:functions][name][:uses] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_iasgn(exp) + pos = statement_position(exp) + exp.shift + name = exp.shift + symbols[:vars][name][:assignments] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_lasgn(exp) + pos = statement_position(exp) + exp.shift + name = exp.shift + symbols[:vars][name][:assignments] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def to_s + str = symbols.to_s + str = str.gsub(/:(\w*)=>/, '"\1":') + return str + end + end +end diff --git a/setup.py b/setup.py index 47508e9..48d4c42 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name = "bitshift", - version = "0.1", + version = "0.1.dev", packages = find_packages(), install_requires = [ "Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", diff --git a/test/parser_test.py b/test/parser_test.py new file mode 100644 index 0000000..a1cfad3 --- /dev/null +++ b/test/parser_test.py @@ -0,0 +1,56 @@ +import socket, sys, struct + +file_name = 'resources/.c' +server_socket_number = 5001 +recv_size = 8192 + +if __name__ == '__main__': + if len(sys.argv) == 1: + print "Please input a parser to test." + + elif len(sys.argv) > 2: + print "Too many arguments." + + else: + if sys.argv[1] == 'c': + pass + + elif sys.argv[1] == 'java': + file_name = "resources/Matrix.java" + server_socket_number = 5002 + + elif sys.argv[1] == 'ruby': + file_name = "resources/parser.rb" + server_socket_number = 5003 + + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.connect(("localhost", server_socket_number)) + + with open(file_name, "r") as source_file: + source = source_file.read() + server_socket.send("%d\n%s" % (len(source), source)); + + total_data = []; size_data = cur_data = '' + total_size = 0; size = sys.maxint + + while total_size < size: + cur_data = server_socket.recv(recv_size) + + if not total_data: + if len(size_data) > 4: + size_data += cur_data + size = struct.unpack('>i', size_data[:4])[0] + recv_size = size + if recv_size > sys.maxint: recv_size = sys.maxint + total_data.append(size_data[4:]) + else: + size_data += cur_data + + else: + total_data.append(cur_data) + + total_size = sum([len(s) for s in total_data]) + + + server_socket.close() + print ''.join(total_data); diff --git a/test/resources/Matrix.java b/test/resources/Matrix.java new file mode 100644 index 0000000..5d641e0 --- /dev/null +++ b/test/resources/Matrix.java @@ -0,0 +1,218 @@ +package battlechap; + +import java.io.PrintStream; + +public class Matrix { + private Object[][] _datmatrix; + + public Matrix(int paramInt){ + this._datmatrix = new Object[paramInt][paramInt]; + } + + public int size() { + return this._datmatrix.length; + } + + public Object get(int paramInt1, int paramInt2) { + return this._datmatrix[paramInt1][paramInt2]; + } + + public boolean isEmpty(int paramInt1, int paramInt2) { + return this._datmatrix[paramInt1][paramInt2] == null; + } + + public boolean equals(Object paramObject) { + boolean bool = true; + if ((paramObject instanceof Matrix)) { + Matrix localMatrix = (Matrix)paramObject; + if (localMatrix.size() == size()) { + for (int i = 0; i < size(); i++) { + for (int j = 0; j < size(); j++) { + if (!localMatrix.get(i, j).equals(get(i, j))) { + bool = false; + break; + } + } + if (!bool) + break; + } + } + else + bool = false; + } + else + { + bool = false; + } + return bool; + } + + public Object set(int paramInt1, int paramInt2, Object paramObject) { + Object localObject = this._datmatrix[paramInt1][paramInt2]; + this._datmatrix[paramInt1][paramInt2] = paramObject; + return localObject; + } + + public void transpose() { + int i = 0; + for (int j = 0; j < size(); j++) { + for (int k = i; k < size(); k++) { + set(j, k, set(k, j, get(j, k))); + } + i++; + } + } + + public static void swapRows(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject[paramInt1].length; i++) { + Object localObject = paramArrayOfObject[paramInt1][i]; + paramArrayOfObject[paramInt1][i] = paramArrayOfObject[paramInt2][i]; + paramArrayOfObject[paramInt2][i] = localObject; + } + } + + public static void swapCols(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject.length; i++) { + Object localObject = paramArrayOfObject[i][paramInt1]; + paramArrayOfObject[i][paramInt1] = paramArrayOfObject[i][paramInt2]; + paramArrayOfObject[i][paramInt2] = localObject; + } + } + + public Object[] getRow(int paramInt) { + Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length]; + for (int i = 0; i < arrayOfObject.length; i++) { + arrayOfObject[i] = this._datmatrix[paramInt][i]; + } + return arrayOfObject; + } + + public Object[] getCol(int paramInt) { + Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length]; + for (int i = 0; i < arrayOfObject.length; i++) { + arrayOfObject[i] = this._datmatrix[i][paramInt]; + } + return arrayOfObject; + } + + public Object[] setRow(int paramInt, Object[] paramArrayOfObject) { + Object[] arrayOfObject = getRow(paramInt); + + for (int i = 0; i < size(); i++) { + set(paramInt, i, paramArrayOfObject[i]); + } + + return arrayOfObject; + } + + public Object[] setCol(int paramInt, Object[] paramArrayOfObject) { + Object[] arrayOfObject = getCol(paramInt); + + for (int i = 0; i < size(); i++) { + set(i, paramInt, paramArrayOfObject[i]); + } + + return arrayOfObject; + } + + public String toString() + { + String str1 = ""; + for (int i = 0; i < this._datmatrix.length; i++) { + if (i < 9) + str1 = str1 + (i + 1) + ": "; + else + str1 = str1 + (i + 1) + ":"; + for (int j = 0; j < this._datmatrix[i].length; j++) { + int k = (this._datmatrix[i][j] + "").length(); + String str2 = " ".substring(k); + str1 = str1 + this._datmatrix[i][j] + str2; + } + str1 = str1 + "\n"; + } + return str1; + } + + public static void print(Object[][] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject.length; i++) { + for (int j = 0; j < paramArrayOfObject[i].length; j++) { + int k = (paramArrayOfObject[i][j] + "").length(); + String str = " ".substring(k); + System.out.print(paramArrayOfObject[i][j] + str); + } + System.out.print("\n"); + } + } + + public static void printArray(Object[] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject.length; i++) { + int j = (paramArrayOfObject[i] + "").length(); + String str = " ".substring(j); + System.out.print(paramArrayOfObject[i] + str); + } + System.out.print("\n"); + } + + public static void main(String[] paramArrayOfString) { + Matrix localMatrix1 = new Matrix(5); + Matrix localMatrix2 = new Matrix(5); + for (int i = 0; i < localMatrix1.size(); i++) { + for (int j = 0; j < localMatrix1.size(); j++) { + Integer localInteger1 = new Integer((int)(Math.random() * 20.0D)); + localMatrix1.set(i, j, localInteger1); + localMatrix2.set(i, j, localInteger1); + } + } + + System.out.println("\nDemonstrating equals method (should be true)\t" + localMatrix2.equals(localMatrix1) + "\n"); + + System.out.println("Demonstrating get method\n" + localMatrix1.get(0, 0) + "\n"); + System.out.println("Demonstrating is empty method\n" + localMatrix1.isEmpty(1, 0) + "\n"); + System.out.println("Demonstrating size method \n" + localMatrix1.size() + "\n"); + System.out.println("Demonstrating toString method\n" + localMatrix1 + "\n"); + localMatrix1.transpose(); + System.out.println("Blop has been transposed\n" + localMatrix1 + "\n"); + + Object[][] arrayOfObject = new Object[4][4]; + for (int j = 0; j < arrayOfObject.length; j++) { + for (int k = 0; k < arrayOfObject[j].length; k++) { + Integer localInteger2 = new Integer((int)(Math.random() * 20.0D)); + arrayOfObject[j][k] = localInteger2; + } + } + System.out.println("\n\n**Swapping Rows Demo**"); + print(arrayOfObject); + System.out.println("\nRows 1 and 2 have been Swapped \n"); + swapRows(1, 2, arrayOfObject); + print(arrayOfObject); + + System.out.println("\n**Swapping Columns Demo**"); + print(arrayOfObject); + System.out.println("\n\nColumns 1 and 2 have been Swapped \n"); + swapCols(1, 2, arrayOfObject); + print(arrayOfObject); + + System.out.println("\n**Getting rows demo (from blop)**"); + System.out.println(localMatrix1); + System.out.println("\nGetting row 1\n"); + printArray(localMatrix1.getRow(1)); + + System.out.println("\n**Getting cols demo (from blop)**"); + System.out.println(localMatrix1); + System.out.println("\nGetting col 1\n"); + printArray(localMatrix1.getCol(1)); + + System.out.println("\n**Demonstrating set row method**"); + System.out.println(localMatrix1); + System.out.println("\nSwitching row 1 of blop to 1st column of blop\n"); + localMatrix1.setRow(1, localMatrix1.getCol(1)); + System.out.println(localMatrix1 + "\n"); + + System.out.println("\n**Demonstrating set col method**"); + System.out.println(localMatrix1); + System.out.println("\nSwitching col 1 of blop to 2nd row of blop\n"); + localMatrix1.setCol(1, localMatrix1.getRow(2)); + System.out.println(localMatrix1 + "\n"); + } +} + diff --git a/test/resources/parser.rb b/test/resources/parser.rb new file mode 100644 index 0000000..01d934b --- /dev/null +++ b/test/resources/parser.rb @@ -0,0 +1,126 @@ +require 'socket' +require 'ruby_parser' +require 'sexp_processor' + +module Bitshift + class Parser + def initialize(source) + @source = source + end + + def parse + parser = RubyParser.new + tree = parser.parse(@source) + puts tree.inspect + offset = tree.line - 1 + processor = NodeVisitor.new offset + processor.process tree + return processor.symbols + end + end + + class NodeVisitor < SexpProcessor + attr_accessor :symbols + attr_accessor :offset + + def initialize(offset) + super() + @require_empty = false + @offset = offset + + module_hash = Hash.new {|hash, key| hash[key] = Hash.new} + class_hash = module_hash.clone + function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } } + var_hash = Hash.new {|hash, key| hash[key] = [] } + + @symbols = { + modules: module_hash, + classes: class_hash, + functions: function_hash, + vars: var_hash + } + end + + def block_position(exp) + pos = Hash.new + end_ln = (start_ln = exp.line - offset) + cur_exp = exp + + while cur_exp.is_a? Sexp + end_ln = cur_exp.line - offset + cur_exp = cur_exp.last + break if cur_exp == nil + end + + pos[:coord] = { + start_ln: start_ln, + end_ln: end_ln } + return pos + end + + def statement_position(exp) + pos = Hash.new + end_ln = start_ln = exp.line - offset + + pos[:coord] = { + start_ln: start_ln, + end_ln: end_ln } + return pos + end + + def process_module(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:modules][name] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_class(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:classes][name] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_defn(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:functions][name][:declaration] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_call(exp) + pos = statement_position exp + exp.shift + exp.shift + name = exp.shift + symbols[:functions][name][:calls] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_iasgn(exp) + pos = statement_position exp + exp.shift + name = exp.shift + symbols[:vars][name] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_lasgn(exp) + pos = statement_position exp + exp.shift + name = exp.shift + symbols[:vars][name] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + end +end