@@ -14,7 +14,6 @@ from threading import Event | |||||
from .crawler import GitHubCrawler, BitbucketCrawler | from .crawler import GitHubCrawler, BitbucketCrawler | ||||
from .indexer import GitIndexer, GitRepository | from .indexer import GitIndexer, GitRepository | ||||
from ..parser import start_parse_servers | |||||
__all__ = ["crawl"] | __all__ = ["crawl"] | ||||
@@ -31,7 +30,6 @@ def crawl(): | |||||
""" | """ | ||||
_configure_logging() | _configure_logging() | ||||
parse_servers = start_parse_servers() | |||||
time.sleep(5) | time.sleep(5) | ||||
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | ||||
@@ -62,8 +60,6 @@ def crawl(): | |||||
repo_clone_queue.queue.clear() | repo_clone_queue.queue.clear() | ||||
for thread in threads: | for thread in threads: | ||||
thread.join() | thread.join() | ||||
for server in parse_servers: | |||||
server.terminate() | |||||
def _configure_logging(): | def _configure_logging(): | ||||
# This isn't ideal, since it means the bitshift python package must be kept | # This isn't ideal, since it means the bitshift python package must be kept | ||||
@@ -1,39 +1,71 @@ | |||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | |||||
<modelVersion>4.0.0</modelVersion> | |||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | |||||
<modelVersion>4.0.0</modelVersion> | |||||
<groupId>com.bitshift.parsing</groupId> | |||||
<artifactId>parsing</artifactId> | |||||
<packaging>jar</packaging> | |||||
<version>1.0-SNAPSHOT</version> | |||||
<name>parsing</name> | |||||
<url>http://maven.apache.org</url> | |||||
<groupId>com.bitshift.parsing</groupId> | |||||
<artifactId>parsing</artifactId> | |||||
<packaging>jar</packaging> | |||||
<version>1.0-SNAPSHOT</version> | |||||
<name>parsing</name> | |||||
<url>http://maven.apache.org</url> | |||||
<dependencies> | |||||
<dependency> | |||||
<groupId>junit</groupId> | |||||
<artifactId>junit</artifactId> | |||||
<version>4.11</version> | |||||
</dependency> | |||||
<dependency> | |||||
<groupId>org.eclipse.jdt</groupId> | |||||
<artifactId>org.eclipse.jdt.core</artifactId> | |||||
<version>3.7.1</version> | |||||
</dependency> | |||||
</dependencies> | |||||
<dependencies> | |||||
<dependency> | |||||
<groupId>junit</groupId> | |||||
<artifactId>junit</artifactId> | |||||
<version>4.11</version> | |||||
</dependency> | |||||
<dependency> | |||||
<groupId>org.eclipse.jdt</groupId> | |||||
<artifactId>org.eclipse.jdt.core</artifactId> | |||||
<version>3.7.1</version> | |||||
</dependency> | |||||
<dependency> | |||||
<groupId>com.google.guava</groupId> | |||||
<artifactId>guava</artifactId> | |||||
<version>17.0</version> | |||||
</dependency> | |||||
</dependencies> | |||||
<build> | |||||
<plugins> | |||||
<plugin> | |||||
<groupId>org.codehaus.mojo</groupId> | |||||
<artifactId>exec-maven-plugin</artifactId> | |||||
<version>1.2.1</version> | |||||
<configuration> | |||||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||||
<arguments> | |||||
</arguments> | |||||
</configuration> | |||||
</plugin> | |||||
<build> | |||||
<plugins> | |||||
<plugin> | |||||
<groupId>org.codehaus.mojo</groupId> | |||||
<artifactId>exec-maven-plugin</artifactId> | |||||
<version>1.2.1</version> | |||||
<configuration> | |||||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||||
<arguments> | |||||
</arguments> | |||||
</configuration> | |||||
</plugin> | |||||
<plugin> | |||||
<artifactId>maven-assembly-plugin</artifactId> | |||||
<version>2.4</version> | |||||
<executions> | |||||
<execution> | |||||
<id>make-assembly</id> | |||||
<phase>package</phase> | |||||
<goals> | |||||
<goal>single</goal> | |||||
</goals> | |||||
<configuration> | |||||
<archive> | |||||
<manifest> | |||||
<addClasspath>true</addClasspath> | |||||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||||
</manifest> | |||||
</archive> | |||||
<descriptorRefs> | |||||
<descriptorRef>jar-with-dependencies</descriptorRef> | |||||
</descriptorRefs> | |||||
<outputDirectory>${project.basedir}</outputDirectory> | |||||
<finalName>${project.artifactId}</finalName> | |||||
<appendAssemblyId>false</appendAssemblyId> | |||||
</configuration> | |||||
</execution> | |||||
</executions> | |||||
</plugin> | |||||
</plugins> | </plugins> | ||||
</build> | </build> | ||||
@@ -1,13 +1,35 @@ | |||||
package com.bitshift.parsing; | package com.bitshift.parsing; | ||||
import com.bitshift.parsing.utils.ParseServer; | |||||
import java.io.BufferedReader; | |||||
import java.io.BufferedWriter; | |||||
import java.io.IOException; | |||||
import java.io.InputStreamReader; | |||||
import java.io.OutputStreamWriter; | |||||
import com.bitshift.parsing.parsers.JavaParser; | |||||
public class Parse { | public class Parse { | ||||
public static void main(String[] args) { | public static void main(String[] args) { | ||||
ParseServer server = new ParseServer(Integer.parseInt(args[0])); | |||||
System.out.println("Java Server listening on port " + args[0]); | |||||
new Thread(server).start(); | |||||
try { | |||||
BufferedReader br = new BufferedReader( | |||||
new InputStreamReader(System.in)); | |||||
String str = ""; | |||||
StringBuilder source = new StringBuilder(); | |||||
while ((str = br.readLine()) != null) { | |||||
source.append(str + "\n"); | |||||
} | |||||
String symbols = (new JavaParser(source.toString())).parse(); | |||||
BufferedWriter bw = new BufferedWriter( | |||||
new OutputStreamWriter(System.out)); | |||||
bw.write(symbols); | |||||
bw.flush(); | |||||
} catch (IOException e) { | |||||
} | |||||
} | } | ||||
} | } |
@@ -1,3 +0,0 @@ | |||||
package com.bitshift.parsing.parsers; | |||||
import com.bitshift.parsing.parsers.Parser; |
@@ -5,8 +5,9 @@ import java.util.List; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Map; | import java.util.Map; | ||||
import java.util.Stack; | import java.util.Stack; | ||||
import java.util.Arrays; | |||||
import java.net.Socket; | |||||
import com.google.common.base.Joiner; | |||||
import org.eclipse.jdt.core.JavaCore; | import org.eclipse.jdt.core.JavaCore; | ||||
import org.eclipse.jdt.core.dom.AST; | import org.eclipse.jdt.core.dom.AST; | ||||
@@ -26,23 +27,20 @@ import org.eclipse.jdt.core.dom.Statement; | |||||
import org.eclipse.jdt.core.dom.TypeDeclaration; | import org.eclipse.jdt.core.dom.TypeDeclaration; | ||||
import org.eclipse.jdt.core.dom.VariableDeclarationFragment; | import org.eclipse.jdt.core.dom.VariableDeclarationFragment; | ||||
import com.bitshift.parsing.parsers.Parser; | |||||
import com.bitshift.parsing.symbols.Symbols; | import com.bitshift.parsing.symbols.Symbols; | ||||
import com.bitshift.parsing.symbols.JavaSymbols; | import com.bitshift.parsing.symbols.JavaSymbols; | ||||
/*TODO: Work on parsing partial java code.*/ | /*TODO: Work on parsing partial java code.*/ | ||||
public class JavaParser extends Parser { | |||||
public class JavaParser { | |||||
private String source; | |||||
public JavaParser(Socket clientSocket) { | |||||
super(clientSocket); | |||||
public JavaParser(String source) { | |||||
this.source = source; | |||||
} | } | ||||
@Override | |||||
protected Symbols genSymbols() { | |||||
char[] source = this.readFromClient().toCharArray(); | |||||
private Symbols genSymbols() { | |||||
ASTParser parser = ASTParser.newParser(AST.JLS3); | ASTParser parser = ASTParser.newParser(AST.JLS3); | ||||
parser.setSource(source); | |||||
parser.setSource(this.source.toCharArray()); | |||||
Map options = JavaCore.getOptions(); | Map options = JavaCore.getOptions(); | ||||
parser.setCompilerOptions(options); | parser.setCompilerOptions(options); | ||||
@@ -55,10 +53,9 @@ public class JavaParser extends Parser { | |||||
return visitor.symbols; | return visitor.symbols; | ||||
} | } | ||||
@Override | |||||
public void run() { | |||||
public String parse() { | |||||
JavaSymbols symbols = (JavaSymbols) this.genSymbols(); | JavaSymbols symbols = (JavaSymbols) this.genSymbols(); | ||||
writeToClient(symbols.toString()); | |||||
return symbols.toString(); | |||||
} | } | ||||
class NodeVisitor extends ASTVisitor { | class NodeVisitor extends ASTVisitor { | ||||
@@ -76,8 +73,10 @@ public class JavaParser extends Parser { | |||||
public ArrayList<Integer> blockPosition(ASTNode node) { | public ArrayList<Integer> blockPosition(ASTNode node) { | ||||
int sl = this.root.getLineNumber(node.getStartPosition()); | int sl = this.root.getLineNumber(node.getStartPosition()); | ||||
int sc = this.root.getColumnNumber(node.getStartPosition()) + 1; | int sc = this.root.getColumnNumber(node.getStartPosition()) + 1; | ||||
int el = this.root.getLineNumber(node.getStartPosition() + node.getLength()); | |||||
int ec = this.root.getColumnNumber(node.getStartPosition() + node.getLength()) + 1; | |||||
int el = this.root.getLineNumber(node.getStartPosition() | |||||
+ node.getLength() - 1); | |||||
int ec = this.root.getColumnNumber(node.getStartPosition() | |||||
+ node.getLength() - 1) + 1; | |||||
return Symbols.createCoord(sl, sc, el, ec); | return Symbols.createCoord(sl, sc, el, ec); | ||||
} | } | ||||
@@ -204,7 +203,12 @@ public class JavaParser extends Parser { | |||||
public void endVisit(ImportDeclaration node) { | public void endVisit(ImportDeclaration node) { | ||||
HashMap<String, Object> data = this._cache.pop(); | HashMap<String, Object> data = this._cache.pop(); | ||||
String name = (String)data.remove("name"); | String name = (String)data.remove("name"); | ||||
this.symbols.insertImportStatement("\"" + name + "\"", data); | |||||
String[] parts = name.split("\\."); | |||||
for(int i = parts.length; i > 1; i--) { | |||||
String pkg = Joiner.on(".").join(Arrays.copyOfRange(parts, 0, i)); | |||||
this.symbols.insertImportStatement("\"" + pkg + "\"", data); | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } |
@@ -8,14 +8,16 @@ import java.io.InputStreamReader; | |||||
import java.io.OutputStreamWriter; | import java.io.OutputStreamWriter; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.nio.ByteBuffer; | |||||
import java.net.Socket; | import java.net.Socket; | ||||
import com.bitshift.parsing.symbols.Symbols; | import com.bitshift.parsing.symbols.Symbols; | ||||
import com.bitshift.parsing.utils.PackableMemory; | |||||
public abstract class Parser implements Runnable { | public abstract class Parser implements Runnable { | ||||
protected Socket clientSocket; | protected Socket clientSocket; | ||||
private String eos; | |||||
public Parser(Socket clientSocket) { | public Parser(Socket clientSocket) { | ||||
this.clientSocket = clientSocket; | this.clientSocket = clientSocket; | ||||
@@ -29,6 +31,7 @@ public abstract class Parser implements Runnable { | |||||
new InputStreamReader(this.clientSocket.getInputStream())); | new InputStreamReader(this.clientSocket.getInputStream())); | ||||
int bytes = Integer.parseInt(clientReader.readLine()); | int bytes = Integer.parseInt(clientReader.readLine()); | ||||
this.eos = clientReader.readLine(); | |||||
StringBuilder builder = new StringBuilder(); | StringBuilder builder = new StringBuilder(); | ||||
int i = 0; | int i = 0; | ||||
@@ -47,29 +50,13 @@ public abstract class Parser implements Runnable { | |||||
return fromClient; | return fromClient; | ||||
} | } | ||||
public String escapeUnicode(String input) { | |||||
StringBuilder b = new StringBuilder(input.length()); | |||||
Formatter f = new Formatter(b); | |||||
for (char c : input.toCharArray()) { | |||||
if (c < 128) { | |||||
b.append(c); | |||||
} else { | |||||
f.format("\\u%04x", (int) c); | |||||
} | |||||
} | |||||
return b.toString(); | |||||
} | |||||
protected void writeToClient(String toClient) { | protected void writeToClient(String toClient) { | ||||
try { | try { | ||||
BufferedWriter clientWriter = new BufferedWriter( | BufferedWriter clientWriter = new BufferedWriter( | ||||
new OutputStreamWriter(this.clientSocket.getOutputStream())); | new OutputStreamWriter(this.clientSocket.getOutputStream())); | ||||
PackableMemory mem = new PackableMemory(4); | |||||
mem.pack(toClient.length(), 0); | |||||
String dataSize = new String(mem.mem); | |||||
clientWriter.write(toClient); | clientWriter.write(toClient); | ||||
clientWriter.write(eos); | |||||
clientWriter.flush(); | clientWriter.flush(); | ||||
this.clientSocket.close(); | this.clientSocket.close(); | ||||
} catch (IOException ex) { | } catch (IOException ex) { | ||||
@@ -1 +0,0 @@ | |||||
package com.bitshift.parsing.symbols; |
@@ -1,89 +0,0 @@ | |||||
package com.bitshift.parsing.utils; | |||||
//This class contains implementations of methods to | |||||
// -- pack an integer into 4 consecutive bytes of a byte array | |||||
// -- unpack an integer from 4 consecutive bytes of a byte array | |||||
// -- exhaustively test the pack and unpack methods. | |||||
// | |||||
// This file should be saved as PackableMemory.java. Once it has been | |||||
// compiled, the tester can be invoked by typing "java PackableMemory" | |||||
public class PackableMemory { | |||||
int size; | |||||
public byte mem[] = null; | |||||
public PackableMemory(int size) | |||||
{ | |||||
this.size = size; | |||||
this.mem = new byte[size]; | |||||
} | |||||
// Pack the 4-byte integer val into the four bytes mem[loc]...mem[loc+3]. | |||||
// The most significant porion of the integer is stored in mem[loc]. | |||||
// Bytes are masked out of the integer and stored in the array, working | |||||
// from right(least significant) to left (most significant). | |||||
public void pack(int val, int loc) | |||||
{ | |||||
final int MASK = 0xff; | |||||
for (int i = 3; i >= 0; i--) | |||||
{ | |||||
mem[loc+i] = (byte)(val & MASK); | |||||
val = val >> 8; | |||||
} | |||||
} | |||||
// Unpack the four bytes mem[loc]...mem[loc+3] into a 4-byte integer, | |||||
// and return the resulting integer value. | |||||
// The most significant porion of the integer is stored in mem[loc]. | |||||
// Bytes are 'OR'ed into the integer, working from left (most significant) | |||||
// to right (least significant) | |||||
public int unpack(int loc) | |||||
{ | |||||
final int MASK = 0xff; | |||||
int v = (int)mem[loc] & MASK; | |||||
for (int i = 1; i < 4; i++) | |||||
{ | |||||
v = v << 8; | |||||
v = v | ((int)mem[loc+i] & MASK); | |||||
} | |||||
return v; | |||||
} | |||||
// Test the above pack and unpack methods by iterating the following | |||||
// over all possible 4-byte integers: pack the integer, | |||||
// then unpack it, and then verify that the unpacked integer equals the | |||||
// original integer. It tests all nonnegative numbers in ascending order | |||||
// and then all negative numbers in ascending order. The transition from | |||||
// positive to negative numbers happens implicitly due to integer overflow. | |||||
public void packTest() | |||||
{ | |||||
int i = 0; | |||||
long k = 0; | |||||
do | |||||
{ | |||||
this.pack(i,4); | |||||
int j = this.unpack(4); | |||||
if (j != i) | |||||
{ | |||||
System.out.printf("pack/unpack test failed: i = %d, j = %d\n",i,j); | |||||
System.exit(0); | |||||
} | |||||
i++; k++; | |||||
} | |||||
while (i != 0); | |||||
System.out.printf("pack/unpack test successful, %d iterations\n",k); | |||||
} | |||||
// main routine to test the PackableMemory class by running the | |||||
// packTest() method. | |||||
public static void main(String[] args) | |||||
{ | |||||
PackableMemory pm = new PackableMemory(100); | |||||
pm.packTest(); | |||||
System.exit(0); | |||||
} | |||||
} | |||||
@@ -1,65 +0,0 @@ | |||||
/* Code for multithreaded server taken from Jakob Jenkov */ | |||||
package com.bitshift.parsing.utils; | |||||
import java.net.ServerSocket; | |||||
import java.net.Socket; | |||||
import java.io.IOException; | |||||
import com.bitshift.parsing.parsers.JavaParser; | |||||
public class ParseServer implements Runnable{ | |||||
protected int serverPort = 8080; | |||||
protected ServerSocket serverSocket = null; | |||||
protected boolean isStopped = false; | |||||
protected Thread runningThread= null; | |||||
public ParseServer(int port){ | |||||
this.serverPort = port; | |||||
} | |||||
public void run(){ | |||||
synchronized(this){ | |||||
this.runningThread = Thread.currentThread(); | |||||
} | |||||
openServerSocket(); | |||||
while(! isStopped()){ | |||||
Socket clientSocket = null; | |||||
try { | |||||
clientSocket = this.serverSocket.accept(); | |||||
} catch (IOException e) { | |||||
if(isStopped()) { | |||||
System.out.println("Server Stopped.") ; | |||||
return; | |||||
} | |||||
throw new RuntimeException( | |||||
"Error accepting client connection", e); | |||||
} | |||||
new Thread(new JavaParser(clientSocket)).start(); | |||||
} | |||||
System.out.println("Server Stopped.") ; | |||||
} | |||||
private synchronized boolean isStopped() { | |||||
return this.isStopped; | |||||
} | |||||
public synchronized void stop(){ | |||||
this.isStopped = true; | |||||
try { | |||||
this.serverSocket.close(); | |||||
} catch (IOException e) { | |||||
throw new RuntimeException("Error closing server", e); | |||||
} | |||||
} | |||||
private void openServerSocket() { | |||||
try { | |||||
this.serverSocket = new ServerSocket(this.serverPort); | |||||
} catch (IOException e) { | |||||
throw new RuntimeException("Cannot open port 8080", e); | |||||
} | |||||
} | |||||
} |
@@ -1,23 +0,0 @@ | |||||
package com.bitshift.parsing.utils; | |||||
import java.util.List; | |||||
import java.util.Arrays; | |||||
public class Tuple<T> { | |||||
private List<T> _objects; | |||||
public Tuple(T... args) { | |||||
_objects = Arrays.asList(args); | |||||
} | |||||
public String toString() { | |||||
StringBuilder builder = new StringBuilder(); | |||||
for(T o: this._objects) { | |||||
builder.append(o + ","); | |||||
} | |||||
String s = builder.toString(); | |||||
return "(" + s.substring(0, s.length() - 1) + ")"; | |||||
} | |||||
} |
@@ -1,26 +0,0 @@ | |||||
require 'socket' | |||||
require File.expand_path('../parser.rb', __FILE__) | |||||
def start_server(port_number) | |||||
server = TCPServer.new port_number | |||||
puts "Ruby Server listening on port #{port_number}\n" | |||||
loop do | |||||
# Start a new thread for each client accepted | |||||
Thread.start(server.accept) do |client| | |||||
begin | |||||
# Get the amount of data to be read | |||||
size = (client.readline).to_i | |||||
eos = ">}e^" | |||||
p = Bitshift::Parser.new client.read(size) | |||||
# Get the parsed result | |||||
symbols = p.parse | |||||
client.puts symbols | |||||
client.puts eos | |||||
ensure | |||||
# Close the socket | |||||
client.close | |||||
end | |||||
end | |||||
end | |||||
end |