瀏覽代碼

Merge branch 'feature/parser' into feature/query_parser

tags/v1.0^2
Ben Kurtovic 10 年之前
父節點
當前提交
7b54df6335
共有 29 個檔案被更改,包括 1624 行新增10 行删除
  1. +11
    -0
      .gitignore
  2. +6
    -0
      README.md
  3. +8
    -1
      bitshift/__init__.py
  4. +2
    -0
      bitshift/codelet.py
  5. +2
    -0
      bitshift/languages.py
  6. +93
    -0
      bitshift/parser/__init__.py
  7. +106
    -0
      bitshift/parser/c.py
  8. +160
    -0
      bitshift/parser/python.py
  9. +27
    -0
      docs/source/api/bitshift.crawler.rst
  10. +19
    -0
      docs/source/api/bitshift.database.rst
  11. +2
    -7
      docs/source/api/bitshift.rst
  12. +1
    -1
      docs/source/conf.py
  13. +40
    -0
      parsers/java/pom.xml
  14. +33
    -0
      parsers/java/src/main/java/com/bitshift/parsing/Parse.java
  15. +3
    -0
      parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java
  16. +221
    -0
      parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java
  17. +64
    -0
      parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java
  18. +1
    -0
      parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java
  19. +147
    -0
      parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java
  20. +17
    -0
      parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java
  21. +89
    -0
      parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java
  22. +4
    -0
      parsers/ruby/Gemfile
  23. +5
    -0
      parsers/ruby/Rakefile
  24. +36
    -0
      parsers/ruby/lib/parse_server.rb
  25. +126
    -0
      parsers/ruby/lib/parser.rb
  26. +1
    -1
      setup.py
  27. +56
    -0
      test/parser_test.py
  28. +218
    -0
      test/resources/Matrix.java
  29. +126
    -0
      test/resources/parser.rb

+ 11
- 0
.gitignore 查看文件

@@ -1,3 +1,4 @@
*.swp
.sass-cache
.DS_Store
.my.cnf
@@ -39,3 +40,13 @@ nosetests.xml
.mr.developer.cfg
.project
.pydevproject

# Maven
target

# Ruby
!parsers/ruby/lib

# Ctags
*/tags
log

+ 6
- 0
README.md 查看文件

@@ -32,3 +32,9 @@ root. Note that this will revert any custom changes made to the files in
`docs/source/api`, so you might want to update them by hand instead.

[SASS]: http://sass-lang.com/guide

Releasing
---------

- Update `__version__` in `bitshift/__init__.py`, `version` in `setup.py`, and
`version` and `release` in `docs/conf.py`.

+ 8
- 1
bitshift/__init__.py 查看文件

@@ -1 +1,8 @@
from . import assets, codelet, config, database, parser, query, crawler
# -*- coding: utf-8 -*-

__author__ = "Benjamin Attal, Ben Kurtovic, Severyn Kozak"
__copyright__ = "Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak"
__license__ = "MIT License"
__version__ = "0.1.dev"

from . import assets, codelet, config, crawler, database, parser, query

+ 2
- 0
bitshift/codelet.py 查看文件

@@ -18,6 +18,8 @@ class Codelet(object):
code was last modified.
:ivar rank: (float) A quanitification of the source code's quality, as
per available ratings (stars, forks, upvotes, etc.).
:ivar symbols: (dict) Dictionary containing dictionaries of functions, classes,
variable definitions, etc.
"""

def __init__(self, name, code, filename, language, authors, code_url,


+ 2
- 0
bitshift/languages.py 查看文件

@@ -0,0 +1,2 @@

LANGS = ["Python", "C", "Java", "Ruby"]

+ 93
- 0
bitshift/parser/__init__.py 查看文件

@@ -0,0 +1,93 @@
import json, pygments.lexers as pgl, sys, socket, struct
from ..languages import LANGS
from .python import parse_py

_all__ = ["parse"]

class UnsupportedFileError(Exception):
pass

def _lang(codelet):
"""
Private function to identify the language of a codelet.

:param codelet: The codelet object to identified.

:type code: Codelet

.. todo::
Modify function to incorporate tags from stackoverflow.
"""

if codelet.filename is not None:
try:
return pgl.guess_lexer_for_filename(codelet.filename, '').name
except:
raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename')

return LANGS.index(pgl.guess_lexer(codelet.code))

def _recv_data(server_socket):
"""
Private function to read string response from a server. It reads a certain
amount of data based on the size it is sent from the server.

:param server_socket: The server that the client is connected to, and will,
read from.

:type code: socket.ServerSocket
"""

recv_size = 8192
total_data = []; size_data = cur_data = ''
total_size = 0; size = sys.maxint

while total_size < size:
cur_data = server_socket.recv(recv_size)

if not total_data:
if len(size_data) > 4:
size_data += cur_data
size = struct.unpack('>i', size_data[:4])[0]
recv_size = size
if recv_size > sys.maxint: recv_size = sys.maxint
total_data.append(size_data[4:])
else:
size_data += cur_data

else:
total_data.append(cur_data)

total_size = sum([len(s) for s in total_data])

server_socket.close()
return ''.join(total_data);


def parse(codelet):
"""
Dispatches the codelet to the correct parser based on its language.
It is the job of the respective parsers to accumulate data about the
code and to convert it into a string representing a python dict.
The codelet is then given dict as its 'symbols' field.

:param codelet: The codelet object to parsed.

:type code: Codelet
"""

lang = _lang(codelet); source = codelet.code
codelet.language = lang
server_socket_number = 5000 + lang

if lang == LANGS.index('Python'):
parse_py(codelet)

else:
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.connect(("localhost", server_socket_number))
server_socket.send("%d\n%s" % (len(source), source));

symbols = json.loads(_recv_data(server_socket))
codelet.symbols = symbols


+ 106
- 0
bitshift/parser/c.py 查看文件

@@ -0,0 +1,106 @@
from pycparser import c_parser, c_ast

class _TreeCutter(c_ast.NodeVisitor):
"""
Local node visitor for c abstract syntax trees.

:ivar accum: (dict) Information on variables, functions, and structs
accumulated from an abstract syntax tree.

:ivar cache: (dict or None) Information stored about parent nodes. Added
to accum when node reaches the lowest possible level.

.. todo::
Add visit function for c_ast.ID to record all uses of a variable.

Use self.cache to store extra information about variables.
"""

def __init__(self):
"""
Create a _TreeCutter instance.
"""

self.accum = {'vars': {}, 'functions': {}, 'structs': {}}
self.cache = None

def start_n_end(self, node):
pass

def visit_FuncDecl(self, node):
"""
Visits FuncDecl nodes in a tree. Adds relevant data about them to accum
after visiting all of its children as well.

:param node: The current node.

:type node: c_ast.FuncDecl

.. todo::
Add other relevant information about functions like parameters and
return type.
"""

self.cache['group'] = 'functions'
self.cache['meta']['end_ln'] = node.coord.line
self.cache['meta']['end_col'] = node.coord.column

self.generic_visit(node)

def visit_Struct(self, node):
"""
Visits Struct nodes in a tree. Adds relevant data about them to accum
after visiting all of its children as well.

:param node: The current node.

:type node: c_ast.Struct

.. todo::
Find other relevant information to add about structs.
"""

self.cache['group'] = 'structs'
self.cache['meta']['end_ln'] = node.coord.line
self.cache['meta']['end_col'] = node.coord.column

self.generic_visit(node)

def visit_Decl(self, node):
"""
Visits Decl nodes in a tree. Adds relevant data about them to accum
after visiting all of its children as well.

:param node: The current node.

:type node: c_ast.Decl
"""

self.cache = {'group': 'vars', 'meta': {}}

self.cache['meta']['start_ln'] = node.coord.line
self.cache['meta']['start_col'] = node.coord.column
self.cache['meta']['end_ln'] = node.coord.line
self.cache['meta']['end_col'] = node.coord.column

self.generic_visit(node)

self.accum[self.cache['group']][node.name] = self.cache['meta']
self.cache = None

def parse_c(codelet):
"""
Adds 'symbols' field to the codelet after parsing the c code.

:param codelet: The codelet object to parsed.

:type code: Codelet

.. todo::
Preprocess c code so that no ParseErrors are thrown.
"""

tree = c_parser.CParser().parse(codelet.code)
cutter = _TreeCutter()
cutter.visit(tree)
codelet.symbols = cutter.accum

+ 160
- 0
bitshift/parser/python.py 查看文件

@@ -0,0 +1,160 @@
import ast

class _CachedWalker(ast.NodeVisitor):
"""
Local node visitor for python abstract syntax trees.

:ivar accum: (dict) Information on variables, functions, and classes
accumulated from an abstract syntax tree.

:ivar cache: (dict or None) Information stored about parent nodes. Added
to accum when node reaches the lowest possible level.

.. todo::
Add visit funciton for ast.Name to record all uses of a variable.

Use self.cache to store extra information about nodes.
"""

def __init__(self):
"""
Create a _TreeCutter instance.
"""

self.accum = {'vars': {}, 'functions': {}, 'classes': {}}
self.cache = []

def block_position(self, node):
"""
Helper function to get the start and end lines of an AST node.

:param node: The node.

:type node: ast.FunctionDef or ast.ClassDef or ast.Module
"""

start_line, start_col = node.lineno, node.col_offset

temp_node = node
while 'body' in temp_node.__dict__:
temp_node = temp_node.body[-1]

end_line, end_col = temp_node.lineno, temp_node.col_offset
return (start_line, start_col, end_line, end_col)

def visit_Assign(self, node):
"""
Visits Assign nodes in a tree. Adds relevant data about them to accum.

:param node: The current node.

:type node: ast.Assign

.. todo::
Add value and type metadata to accum.
"""

line, col = node.lineno, node.col_offset
pos = (line, col, -1, -1)

self.cache.append({'nodes': []})
self.generic_visit(node)
last = self.cache.pop()

for name in last['nodes']:
if not self.accum['vars'].has_key(name):
self.accum['vars'][name] = {'assignments': [], 'uses': []}

self.accum['vars'][name]['assignments'].append(pos)


def visit_FunctionDef(self, node):
"""
Visits FunctionDef nodes in a tree. Adds relevant data about them to accum.

:param node: The current node.

:type node: ast.FunctionDef

.. todo::
Add arguments and decorators metadata to accum.
"""

start_line, start_col, end_line, end_col = self.block_position(node)

if not self.accum['functions'].has_key(node.name):
self.accum['functions'][node.name] = {'assignments': [], 'uses': []}

pos = (start_line, start_col, end_line, end_col)
self.accum['functions'][node.name]['assignments'].append(pos)

self.generic_visit(node)

def visit_Call(self, node):
"""
Visits Function Call nodes in a tree. Adds relevant data about them
in the functions section for accum.

:param node: The current node.

:type node: ast.Call

.. todo::
Add arguments and decorators metadata to accum.
"""

line, col = node.lineno, node.col_offset
pos = (line, col, -1, -1)

if isinstance(node.func, ast.Name):
name = node.func.id
else:
name = node.func.attr

if not self.accum['functions'].has_key(name):
self.accum['functions'][name] = {'assignments': [], 'uses': []}

self.accum['functions'][name]['uses'].append(pos)

def visit_ClassDef(self, node):
"""
Visits ClassDef nodes in a tree. Adds relevant data about them to accum.

:param node: The current node.

:type node: ast.ClassDef

.. todo::
Add arguments, inherits, and decorators metadata to accum.
"""

start_line, start_col, end_line, end_col = self.block_position(node)

pos = (start_line, start_col, end_line, end_col)
self.accum['classes'][node.name] = pos

self.generic_visit(node)

def visit_Name(self, node):
if self.cache:
last = self.cache[-1]
last['nodes'].append(node.id)

def visit_Attribute(self, node):
if self.cache:
last = self.cache[-1]
last['nodes'].append(node.attr)

def parse_py(codelet):
"""
Adds 'symbols' field to the codelet after parsing the python code.

:param codelet: The codelet object to parsed.

:type code: Codelet
"""

tree = ast.parse(codelet.code)
cutter = _CachedWalker()
cutter.visit(tree)
codelet.symbols = cutter.accum

+ 27
- 0
docs/source/api/bitshift.crawler.rst 查看文件

@@ -0,0 +1,27 @@
crawler Package
===============

:mod:`crawler` Package
----------------------

.. automodule:: bitshift.crawler
:members:
:undoc-members:
:show-inheritance:

:mod:`crawler` Module
---------------------

.. automodule:: bitshift.crawler.crawler
:members:
:undoc-members:
:show-inheritance:

:mod:`indexer` Module
---------------------

.. automodule:: bitshift.crawler.indexer
:members:
:undoc-members:
:show-inheritance:


+ 19
- 0
docs/source/api/bitshift.database.rst 查看文件

@@ -0,0 +1,19 @@
database Package
================

:mod:`database` Package
-----------------------

.. automodule:: bitshift.database
:members:
:undoc-members:
:show-inheritance:

:mod:`migration` Module
-----------------------

.. automodule:: bitshift.database.migration
:members:
:undoc-members:
:show-inheritance:


+ 2
- 7
docs/source/api/bitshift.rst 查看文件

@@ -33,19 +33,13 @@ bitshift Package
:undoc-members:
:show-inheritance:

:mod:`database` Module

.. automodule:: bitshift.database
:members:
:undoc-members:
:show-inheritance:

Subpackages
-----------

.. toctree::

bitshift.crawler
bitshift.database
bitshift.parser
bitshift.query


+ 1
- 1
docs/source/conf.py 查看文件

@@ -59,7 +59,7 @@ copyright = u'2014, Benjamin Attal, Ben Kurtovic, Severyn Kozak'
# The short X.Y version.
version = '0.1'
# The full version, including alpha/beta/rc tags.
release = '0.1'
release = '0.1.dev'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.


+ 40
- 0
parsers/java/pom.xml 查看文件

@@ -0,0 +1,40 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.bitshift.parsing</groupId>
<artifactId>parsing</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>parsing</name>
<url>http://maven.apache.org</url>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
</dependency>
<dependency>
<groupId>org.eclipse.jdt</groupId>
<artifactId>org.eclipse.jdt.core</artifactId>
<version>3.7.1</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.2.1</version>
<configuration>
<mainClass>com.bitshift.parsing.Parse</mainClass>
<arguments>
</arguments>
</configuration>
</plugin>
</plugins>
</build>

</project>

+ 33
- 0
parsers/java/src/main/java/com/bitshift/parsing/Parse.java 查看文件

@@ -0,0 +1,33 @@
package com.bitshift.parsing;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.IOException;

import java.net.ServerSocket;
import java.net.Socket;

import com.bitshift.parsing.parsers.JavaParser;

public class Parse {

public static void main(String[] args) {
String fromClient;
String toClient;

try {
ServerSocket server = new ServerSocket(5002);

while(true) {
Socket clientSocket = server.accept();

JavaParser parser = new JavaParser(clientSocket);
Thread parserTask = new Thread(parser);
parserTask.start();
}
} catch (IOException ex) {
}
}

}

+ 3
- 0
parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java 查看文件

@@ -0,0 +1,3 @@
package com.bitshift.parsing.parsers;

import com.bitshift.parsing.parsers.Parser;

+ 221
- 0
parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java 查看文件

@@ -0,0 +1,221 @@
package com.bitshift.parsing.parsers;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import java.net.Socket;

import org.eclipse.jdt.core.JavaCore;
import org.eclipse.jdt.core.dom.AST;
import org.eclipse.jdt.core.dom.ASTParser;
import org.eclipse.jdt.core.dom.ASTVisitor;
import org.eclipse.jdt.core.dom.CompilationUnit;
import org.eclipse.jdt.core.dom.ClassInstanceCreation;
import org.eclipse.jdt.core.dom.FieldDeclaration;
import org.eclipse.jdt.core.dom.MethodDeclaration;
import org.eclipse.jdt.core.dom.MethodInvocation;
import org.eclipse.jdt.core.dom.Name;
import org.eclipse.jdt.core.dom.PackageDeclaration;
import org.eclipse.jdt.core.dom.QualifiedName;
import org.eclipse.jdt.core.dom.SimpleName;
import org.eclipse.jdt.core.dom.Statement;
import org.eclipse.jdt.core.dom.TypeDeclaration;
import org.eclipse.jdt.core.dom.VariableDeclarationFragment;

import com.bitshift.parsing.parsers.Parser;
import com.bitshift.parsing.symbols.Symbols;
import com.bitshift.parsing.symbols.JavaSymbols;

/*TODO: Work on parsing partial java code.*/
public class JavaParser extends Parser {

public JavaParser(Socket clientSocket) {
super(clientSocket);
}

@Override
protected Symbols genSymbols() {
char[] source = this.readFromClient().toCharArray();

ASTParser parser = ASTParser.newParser(AST.JLS3);
parser.setSource(source);

Map options = JavaCore.getOptions();
parser.setCompilerOptions(options);

CompilationUnit root = (CompilationUnit) parser.createAST(null);

NodeVisitor visitor = new NodeVisitor(root);
root.accept(visitor);

return visitor.symbols;
}

@Override
public void run() {
JavaSymbols symbols = (JavaSymbols) this.genSymbols();
writeToClient(symbols.toString());
}

class NodeVisitor extends ASTVisitor {

protected CompilationUnit root;
protected JavaSymbols symbols;
private Stack<HashMap<String, Object>> _cache;

public NodeVisitor(CompilationUnit root) {
this.root = root;
this.symbols = new JavaSymbols();
this._cache = new Stack<HashMap<String, Object>>();
}

public boolean visit(FieldDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();
int sl = this.root.getLineNumber(node.getStartPosition());
int sc = this.root.getColumnNumber(node.getStartPosition());

data.put("coord", Symbols.createCoord(sl, sc, -1, -1));
this._cache.push(data);
return true;
}

public void endVisit(FieldDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.insertFieldDeclaration(name, data);
}

public boolean visit(MethodDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();
Name nameObj = node.getName();
String name = nameObj.isQualifiedName() ?
((QualifiedName) nameObj).getFullyQualifiedName() :
((SimpleName) nameObj).getIdentifier();
List<Statement> statements = node.getBody().statements();

int sl = this.root.getLineNumber(node.getStartPosition());
int sc = this.root.getColumnNumber(node.getStartPosition());
Integer el = -1;
Integer ec = -1;

if (statements.size() > 0) {
Statement last = statements.get(statements.size() - 1);
el = this.root.getLineNumber(last.getStartPosition());
ec = this.root.getColumnNumber(last.getStartPosition());
}

data.put("coord", Symbols.createCoord(sl, sc, el, ec));
data.put("name", name);
this._cache.push(data);
return true;
}

public void endVisit(MethodDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.insertMethodDeclaration(name, data);
}

public boolean visit(MethodInvocation node) {
HashMap<String, Object> data = new HashMap<String, Object>();
Name nameObj = node.getName();
String name = nameObj.isQualifiedName() ?
((QualifiedName) nameObj).getFullyQualifiedName() :
((SimpleName) nameObj).getIdentifier();
int sl = this.root.getLineNumber(node.getStartPosition());
int sc = this.root.getColumnNumber(node.getStartPosition());

data.put("coord", Symbols.createCoord(sl, sc, -1, -1));
data.put("name", name);
this._cache.push(data);
return true;
}

public void endVisit(MethodInvocation node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.insertMethodInvocation(name, data);
}

public boolean visit(PackageDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();
this._cache.push(data);
return true;
}

public void endVisit(PackageDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.setPackage(name);
}

public boolean visit(TypeDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();

int sl = this.root.getLineNumber(node.getStartPosition());
int sc = this.root.getColumnNumber(node.getStartPosition());

data.put("coord", Symbols.createCoord(sl, sc, -1, -1));
this._cache.push(data);
return true;
}

public void endVisit(TypeDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");

if (node.isInterface()) {
this.symbols.insertInterfaceDeclaration(name, data);
} else {
this.symbols.insertClassDeclaration(name, data);
}
}

public boolean visit(VariableDeclarationFragment node) {
HashMap<String, Object> data = new HashMap<String, Object>();
int sl = this.root.getLineNumber(node.getStartPosition());
int sc = this.root.getColumnNumber(node.getStartPosition());

data.put("coord", Symbols.createCoord(sl, sc, -1, -1));
this._cache.push(data);
return true;
}

public void endVisit(VariableDeclarationFragment node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.insertVariableDeclaration(name, data);
}

public boolean visit(QualifiedName node) {
if (!this._cache.empty()) {
HashMap<String, Object> data = this._cache.pop();

if(!data.containsKey("name")) {
String name = node.getFullyQualifiedName();
data.put("name", name);
}

this._cache.push(data);
}
return true;
}

public boolean visit(SimpleName node) {
if (!this._cache.empty()) {
HashMap<String, Object> data = this._cache.pop();

if(!data.containsKey("name")) {
String name = node.getIdentifier();
data.put("name", name);
}

this._cache.push(data);
}
return true;
}

}
}

+ 64
- 0
parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java 查看文件

@@ -0,0 +1,64 @@
package com.bitshift.parsing.parsers;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.IOException;

import java.net.Socket;

import com.bitshift.parsing.symbols.Symbols;
import com.bitshift.parsing.utils.PackableMemory;

public abstract class Parser implements Runnable {

protected Socket clientSocket;

public Parser(Socket clientSocket) {
this.clientSocket = clientSocket;
}

protected String readFromClient() {
String fromClient = "";

try {
BufferedReader clientReader = new BufferedReader(
new InputStreamReader(this.clientSocket.getInputStream()));

int bytes = Integer.parseInt(clientReader.readLine());

StringBuilder builder = new StringBuilder();
int i = 0;

while(i < bytes) {
char aux = (char)clientReader.read();
builder.append(aux);
i++;
}

fromClient = builder.toString();

} catch (IOException ex) {
}

return fromClient;
}

protected void writeToClient(String toClient) {
try {
PrintWriter clientWriter = new PrintWriter(
this.clientSocket.getOutputStream(), true);

PackableMemory mem = new PackableMemory(toClient.length());
String dataSize = new String(mem.mem);
clientWriter.println(dataSize + toClient);
} catch (IOException ex) {
}
}

protected abstract Symbols genSymbols();

public abstract void run();

}


+ 1
- 0
parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java 查看文件

@@ -0,0 +1 @@
package com.bitshift.parsing.symbols;

+ 147
- 0
parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java 查看文件

@@ -0,0 +1,147 @@
package com.bitshift.parsing.symbols;

import java.util.HashMap;
import java.util.ArrayList;
import com.bitshift.parsing.symbols.Symbols;

/*TODO: Overwrite toString.*/
public class JavaSymbols extends Symbols {

private String _packageName;
private HashMap<String, HashMap<String, Object>> _classes;
private HashMap<String, HashMap<String, Object>> _interfaces;
private HashMap<String, HashMap<String, Object>> _methods;
private HashMap<String, HashMap<String, Object>> _fields;
private HashMap<String, HashMap<String, Object>> _vars;

public JavaSymbols() {
_packageName = null;
_classes = new HashMap<String, HashMap<String, Object>>();
_interfaces = new HashMap<String, HashMap<String, Object>>();
_methods = new HashMap<String, HashMap<String, Object>>();
_fields = new HashMap<String, HashMap<String, Object>>();
_vars = new HashMap<String, HashMap<String, Object>>();
}

public boolean setPackage(String name) {
_packageName = name;
return true;
}

public boolean insertClassDeclaration(String name, HashMap<String, Object> data) {
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);
HashMap<String, Object> klass = new HashMap<String, Object>();

assignments.add(data.get("coord"));
klass.put("assignments", assignments);
klass.put("uses", uses);

this._classes.put(name, klass);
return true;
}

public boolean insertInterfaceDeclaration(String name, HashMap<String, Object> data) {
this._interfaces.put(name, data);
return true;
}

public boolean insertMethodDeclaration(String name, HashMap<String, Object> data) {
HashMap<String, Object> method = this._methods.get(name);
if (method == null) {
method = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

assignments.add(data.get("coord"));
method.put("assignments", assignments);
method.put("uses", uses);
} else {
ArrayList<Object> assignments = (ArrayList<Object>)method.get("assignments");

assignments.add(data.get("coord"));
method.put("assignments", assignments);
}

this._methods.put(name, method);
return true;
}
public boolean insertMethodInvocation(String name, HashMap<String, Object> data) {
HashMap<String, Object> method = this._methods.get(name);
if (method == null) {
method = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

uses.add(data.get("coord"));
method.put("assignments", assignments);
method.put("uses", uses);
} else {
ArrayList<Object> uses = (ArrayList<Object>)method.get("uses");

uses.add(data.get("coord"));
method.put("uses", uses);
}

this._methods.put(name, method);
return true;
}

public boolean insertFieldDeclaration(String name, HashMap<String, Object> data) {
this._fields.put(name, data);
return true;
}

public boolean insertVariableDeclaration(String name, HashMap<String, Object> data) {
HashMap<String, Object> var = this._vars.get(name);
if (var == null) {
var = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

assignments.add(data.get("coord"));
var.put("assignments", assignments);
var.put("uses", uses);
} else {
ArrayList<Object> assignments = (ArrayList<Object>)var.get("assignments");

assignments.add(data.get("coord"));
var.put("assignments", assignments);
}

this._vars.put(name, var);
return true;
}
public boolean insertVariableAccess(String name, HashMap<String, Object> data) {
HashMap<String, Object> var = this._vars.get(name);
if (var == null) {
var = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

uses.add(data.get("coord"));
var.put("assignments", assignments);
var.put("uses", uses);
} else {
ArrayList<Object> uses = (ArrayList<Object>)var.get("uses");

uses.add(data.get("coord"));
var.put("uses", uses);
}

this._vars.put(name, var);
return true;
}

public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("classes:" + this._classes + ",");
builder.append("interfaces:" + this._interfaces + ",");
builder.append("methods:" + this._methods + ",");
builder.append("fields:" + this._fields + ",");
builder.append("vars:" + this._vars + ",");

return "{" + builder.toString() + "}";
}
}


+ 17
- 0
parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java 查看文件

@@ -0,0 +1,17 @@
package com.bitshift.parsing.symbols;

import java.util.ArrayList;

public abstract class Symbols {

public Symbols() {

}

public static ArrayList<Integer> createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) {
ArrayList<Integer> coord = new ArrayList<Integer>(4);
coord.add(startLine); coord.add(startCol); coord.add(endLine); coord.add(endCol);
return coord;
}

}

+ 89
- 0
parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java 查看文件

@@ -0,0 +1,89 @@
package com.bitshift.parsing.utils;

//This class contains implementations of methods to
// -- pack an integer into 4 consecutive bytes of a byte array
// -- unpack an integer from 4 consecutive bytes of a byte array
// -- exhaustively test the pack and unpack methods.
//
// This file should be saved as PackableMemory.java. Once it has been
// compiled, the tester can be invoked by typing "java PackableMemory"

public class PackableMemory {
int size;
public byte mem[] = null;

public PackableMemory(int size)
{
this.size = size;
this.mem = new byte[size];
}

// Pack the 4-byte integer val into the four bytes mem[loc]...mem[loc+3].
// The most significant porion of the integer is stored in mem[loc].
// Bytes are masked out of the integer and stored in the array, working
// from right(least significant) to left (most significant).
void pack(int val, int loc)
{
final int MASK = 0xff;
for (int i = 3; i >= 0; i--)
{
mem[loc+i] = (byte)(val & MASK);
val = val >> 8;
}
}

// Unpack the four bytes mem[loc]...mem[loc+3] into a 4-byte integer,
// and return the resulting integer value.
// The most significant porion of the integer is stored in mem[loc].
// Bytes are 'OR'ed into the integer, working from left (most significant)
// to right (least significant)
int unpack(int loc)
{
final int MASK = 0xff;
int v = (int)mem[loc] & MASK;
for (int i = 1; i < 4; i++)
{
v = v << 8;
v = v | ((int)mem[loc+i] & MASK);
}
return v;
}



// Test the above pack and unpack methods by iterating the following
// over all possible 4-byte integers: pack the integer,
// then unpack it, and then verify that the unpacked integer equals the
// original integer. It tests all nonnegative numbers in ascending order
// and then all negative numbers in ascending order. The transition from
// positive to negative numbers happens implicitly due to integer overflow.
public void packTest()
{

int i = 0;
long k = 0;
do
{
this.pack(i,4);
int j = this.unpack(4);
if (j != i)
{
System.out.printf("pack/unpack test failed: i = %d, j = %d\n",i,j);
System.exit(0);
}
i++; k++;
}
while (i != 0);
System.out.printf("pack/unpack test successful, %d iterations\n",k);
}

// main routine to test the PackableMemory class by running the
// packTest() method.
public static void main(String[] args)
{
PackableMemory pm = new PackableMemory(100);
pm.packTest();
System.exit(0);
}
}


+ 4
- 0
parsers/ruby/Gemfile 查看文件

@@ -0,0 +1,4 @@
source 'https://rubygems.org'

gem 'ruby_parser'
gem 'sexp_processor'

+ 5
- 0
parsers/ruby/Rakefile 查看文件

@@ -0,0 +1,5 @@
require File.expand_path('../lib/parse_server.rb', __FILE__)

task :start_server do |t|
start_server
end

+ 36
- 0
parsers/ruby/lib/parse_server.rb 查看文件

@@ -0,0 +1,36 @@
require 'socket'
require File.expand_path('../parser.rb', __FILE__)

def pack_int(i)
bytes = []; mask = 255

while bytes.length < 4
bytes.unshift (i & mask)
i = i >> 8
end

return bytes.pack('cccc')
end


def start_server
server = TCPServer.new 5003

loop do
# Start a new thread for each client accepted
Thread.start(server.accept) do |client|
begin
# Get the amount of data to be read
size = (client.readline).to_i
p = Bitshift::Parser.new client.read(size)
# Get the parsed result
symbols = p.parse
client.puts pack_int(symbols.length)
client.puts symbols
ensure
# Close the socket
client.close
end
end
end
end

+ 126
- 0
parsers/ruby/lib/parser.rb 查看文件

@@ -0,0 +1,126 @@
require 'socket'
require 'ruby_parser'
require 'sexp_processor'

module Bitshift
class Parser
def initialize(source)
@source = source
end

def parse
parser = RubyParser.new
tree = parser.parse(@source)
offset = tree.line - 1
processor = CachedWalker.new offset, tree
processor.process(tree)
return processor.to_s
end
end

class CachedWalker < SexpProcessor
attr_accessor :symbols
attr_accessor :offset

def initialize(offset, tree)
super()

module_hash = Hash.new {|hash, key| hash[key] = { assignments: [], uses: [] }}
class_hash = module_hash.clone
function_hash = module_hash.clone
var_hash = module_hash.clone

@require_empty = false
@offset = offset
@symbols = {
modules: module_hash,
classes: class_hash,
functions: function_hash,
vars: var_hash
}
end

def block_position(exp)
end_ln = (start_ln = exp.line - offset)
cur_exp = exp

while cur_exp.is_a? Sexp
end_ln = cur_exp.line - offset
cur_exp = cur_exp.last
break if cur_exp == nil
end

pos = [start_ln, -1, end_ln, -1]
return pos
end

def statement_position(exp)
pos = Hash.new
end_ln = start_ln = exp.line - offset

pos = [start_ln, -1, end_ln, -1]
return pos
end

def process_module(exp)
pos = block_position(exp)
exp.shift
name = exp.shift
symbols[:modules][name][:assignments] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_class(exp)
pos = block_position(exp)
exp.shift
name = exp.shift
symbols[:classes][name][:assignments] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_defn(exp)
pos = block_position(exp)
exp.shift
name = exp.shift
symbols[:functions][name][:assignments] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_call(exp)
pos = statement_position(exp)
exp.shift
exp.shift
name = exp.shift
symbols[:functions][name][:uses] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_iasgn(exp)
pos = statement_position(exp)
exp.shift
name = exp.shift
symbols[:vars][name][:assignments] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_lasgn(exp)
pos = statement_position(exp)
exp.shift
name = exp.shift
symbols[:vars][name][:assignments] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def to_s
str = symbols.to_s
str = str.gsub(/:(\w*)=>/, '"\1":')
return str
end
end
end

+ 1
- 1
setup.py 查看文件

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages

setup(
name = "bitshift",
version = "0.1",
version = "0.1.dev",
packages = find_packages(),
install_requires = [
"Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0",


+ 56
- 0
test/parser_test.py 查看文件

@@ -0,0 +1,56 @@
import socket, sys, struct

file_name = 'resources/<name>.c'
server_socket_number = 5001
recv_size = 8192

if __name__ == '__main__':
if len(sys.argv) == 1:
print "Please input a parser to test."

elif len(sys.argv) > 2:
print "Too many arguments."

else:
if sys.argv[1] == 'c':
pass

elif sys.argv[1] == 'java':
file_name = "resources/Matrix.java"
server_socket_number = 5002

elif sys.argv[1] == 'ruby':
file_name = "resources/parser.rb"
server_socket_number = 5003

server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.connect(("localhost", server_socket_number))

with open(file_name, "r") as source_file:
source = source_file.read()
server_socket.send("%d\n%s" % (len(source), source));

total_data = []; size_data = cur_data = ''
total_size = 0; size = sys.maxint

while total_size < size:
cur_data = server_socket.recv(recv_size)

if not total_data:
if len(size_data) > 4:
size_data += cur_data
size = struct.unpack('>i', size_data[:4])[0]
recv_size = size
if recv_size > sys.maxint: recv_size = sys.maxint
total_data.append(size_data[4:])
else:
size_data += cur_data

else:
total_data.append(cur_data)

total_size = sum([len(s) for s in total_data])


server_socket.close()
print ''.join(total_data);

+ 218
- 0
test/resources/Matrix.java 查看文件

@@ -0,0 +1,218 @@
package battlechap;

import java.io.PrintStream;

public class Matrix {
private Object[][] _datmatrix;

public Matrix(int paramInt){
this._datmatrix = new Object[paramInt][paramInt];
}

public int size() {
return this._datmatrix.length;
}

public Object get(int paramInt1, int paramInt2) {
return this._datmatrix[paramInt1][paramInt2];
}

public boolean isEmpty(int paramInt1, int paramInt2) {
return this._datmatrix[paramInt1][paramInt2] == null;
}

public boolean equals(Object paramObject) {
boolean bool = true;
if ((paramObject instanceof Matrix)) {
Matrix localMatrix = (Matrix)paramObject;
if (localMatrix.size() == size()) {
for (int i = 0; i < size(); i++) {
for (int j = 0; j < size(); j++) {
if (!localMatrix.get(i, j).equals(get(i, j))) {
bool = false;
break;
}
}
if (!bool)
break;
}
}
else
bool = false;
}
else
{
bool = false;
}
return bool;
}

public Object set(int paramInt1, int paramInt2, Object paramObject) {
Object localObject = this._datmatrix[paramInt1][paramInt2];
this._datmatrix[paramInt1][paramInt2] = paramObject;
return localObject;
}

public void transpose() {
int i = 0;
for (int j = 0; j < size(); j++) {
for (int k = i; k < size(); k++) {
set(j, k, set(k, j, get(j, k)));
}
i++;
}
}

public static void swapRows(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject[paramInt1].length; i++) {
Object localObject = paramArrayOfObject[paramInt1][i];
paramArrayOfObject[paramInt1][i] = paramArrayOfObject[paramInt2][i];
paramArrayOfObject[paramInt2][i] = localObject;
}
}

public static void swapCols(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject.length; i++) {
Object localObject = paramArrayOfObject[i][paramInt1];
paramArrayOfObject[i][paramInt1] = paramArrayOfObject[i][paramInt2];
paramArrayOfObject[i][paramInt2] = localObject;
}
}

public Object[] getRow(int paramInt) {
Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length];
for (int i = 0; i < arrayOfObject.length; i++) {
arrayOfObject[i] = this._datmatrix[paramInt][i];
}
return arrayOfObject;
}

public Object[] getCol(int paramInt) {
Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length];
for (int i = 0; i < arrayOfObject.length; i++) {
arrayOfObject[i] = this._datmatrix[i][paramInt];
}
return arrayOfObject;
}

public Object[] setRow(int paramInt, Object[] paramArrayOfObject) {
Object[] arrayOfObject = getRow(paramInt);

for (int i = 0; i < size(); i++) {
set(paramInt, i, paramArrayOfObject[i]);
}

return arrayOfObject;
}

public Object[] setCol(int paramInt, Object[] paramArrayOfObject) {
Object[] arrayOfObject = getCol(paramInt);

for (int i = 0; i < size(); i++) {
set(i, paramInt, paramArrayOfObject[i]);
}

return arrayOfObject;
}

public String toString()
{
String str1 = "";
for (int i = 0; i < this._datmatrix.length; i++) {
if (i < 9)
str1 = str1 + (i + 1) + ": ";
else
str1 = str1 + (i + 1) + ":";
for (int j = 0; j < this._datmatrix[i].length; j++) {
int k = (this._datmatrix[i][j] + "").length();
String str2 = " ".substring(k);
str1 = str1 + this._datmatrix[i][j] + str2;
}
str1 = str1 + "\n";
}
return str1;
}

public static void print(Object[][] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject.length; i++) {
for (int j = 0; j < paramArrayOfObject[i].length; j++) {
int k = (paramArrayOfObject[i][j] + "").length();
String str = " ".substring(k);
System.out.print(paramArrayOfObject[i][j] + str);
}
System.out.print("\n");
}
}

public static void printArray(Object[] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject.length; i++) {
int j = (paramArrayOfObject[i] + "").length();
String str = " ".substring(j);
System.out.print(paramArrayOfObject[i] + str);
}
System.out.print("\n");
}

public static void main(String[] paramArrayOfString) {
Matrix localMatrix1 = new Matrix(5);
Matrix localMatrix2 = new Matrix(5);
for (int i = 0; i < localMatrix1.size(); i++) {
for (int j = 0; j < localMatrix1.size(); j++) {
Integer localInteger1 = new Integer((int)(Math.random() * 20.0D));
localMatrix1.set(i, j, localInteger1);
localMatrix2.set(i, j, localInteger1);
}
}

System.out.println("\nDemonstrating equals method (should be true)\t" + localMatrix2.equals(localMatrix1) + "\n");

System.out.println("Demonstrating get method\n" + localMatrix1.get(0, 0) + "\n");
System.out.println("Demonstrating is empty method\n" + localMatrix1.isEmpty(1, 0) + "\n");
System.out.println("Demonstrating size method \n" + localMatrix1.size() + "\n");
System.out.println("Demonstrating toString method\n" + localMatrix1 + "\n");
localMatrix1.transpose();
System.out.println("Blop has been transposed\n" + localMatrix1 + "\n");

Object[][] arrayOfObject = new Object[4][4];
for (int j = 0; j < arrayOfObject.length; j++) {
for (int k = 0; k < arrayOfObject[j].length; k++) {
Integer localInteger2 = new Integer((int)(Math.random() * 20.0D));
arrayOfObject[j][k] = localInteger2;
}
}
System.out.println("\n\n**Swapping Rows Demo**");
print(arrayOfObject);
System.out.println("\nRows 1 and 2 have been Swapped \n");
swapRows(1, 2, arrayOfObject);
print(arrayOfObject);

System.out.println("\n**Swapping Columns Demo**");
print(arrayOfObject);
System.out.println("\n\nColumns 1 and 2 have been Swapped \n");
swapCols(1, 2, arrayOfObject);
print(arrayOfObject);

System.out.println("\n**Getting rows demo (from blop)**");
System.out.println(localMatrix1);
System.out.println("\nGetting row 1\n");
printArray(localMatrix1.getRow(1));

System.out.println("\n**Getting cols demo (from blop)**");
System.out.println(localMatrix1);
System.out.println("\nGetting col 1\n");
printArray(localMatrix1.getCol(1));

System.out.println("\n**Demonstrating set row method**");
System.out.println(localMatrix1);
System.out.println("\nSwitching row 1 of blop to 1st column of blop\n");
localMatrix1.setRow(1, localMatrix1.getCol(1));
System.out.println(localMatrix1 + "\n");

System.out.println("\n**Demonstrating set col method**");
System.out.println(localMatrix1);
System.out.println("\nSwitching col 1 of blop to 2nd row of blop\n");
localMatrix1.setCol(1, localMatrix1.getRow(2));
System.out.println(localMatrix1 + "\n");
}
}


+ 126
- 0
test/resources/parser.rb 查看文件

@@ -0,0 +1,126 @@
require 'socket'
require 'ruby_parser'
require 'sexp_processor'

module Bitshift
class Parser
def initialize(source)
@source = source
end

def parse
parser = RubyParser.new
tree = parser.parse(@source)
puts tree.inspect
offset = tree.line - 1
processor = NodeVisitor.new offset
processor.process tree
return processor.symbols
end
end

class NodeVisitor < SexpProcessor
attr_accessor :symbols
attr_accessor :offset

def initialize(offset)
super()
@require_empty = false
@offset = offset

module_hash = Hash.new {|hash, key| hash[key] = Hash.new}
class_hash = module_hash.clone
function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } }
var_hash = Hash.new {|hash, key| hash[key] = [] }

@symbols = {
modules: module_hash,
classes: class_hash,
functions: function_hash,
vars: var_hash
}
end

def block_position(exp)
pos = Hash.new
end_ln = (start_ln = exp.line - offset)
cur_exp = exp

while cur_exp.is_a? Sexp
end_ln = cur_exp.line - offset
cur_exp = cur_exp.last
break if cur_exp == nil
end

pos[:coord] = {
start_ln: start_ln,
end_ln: end_ln }
return pos
end

def statement_position(exp)
pos = Hash.new
end_ln = start_ln = exp.line - offset

pos[:coord] = {
start_ln: start_ln,
end_ln: end_ln }
return pos
end

def process_module(exp)
pos = block_position exp
exp.shift
name = exp.shift
symbols[:modules][name] = pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_class(exp)
pos = block_position exp
exp.shift
name = exp.shift
symbols[:classes][name] = pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_defn(exp)
pos = block_position exp
exp.shift
name = exp.shift
symbols[:functions][name][:declaration] = pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_call(exp)
pos = statement_position exp
exp.shift
exp.shift
name = exp.shift
symbols[:functions][name][:calls] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_iasgn(exp)
pos = statement_position exp
exp.shift
name = exp.shift
symbols[:vars][name] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_lasgn(exp)
pos = statement_position exp
exp.shift
name = exp.shift
symbols[:vars][name] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end
end
end

Loading…
取消
儲存