@@ -14,7 +14,6 @@ from threading import Event | |||
from .crawler import GitHubCrawler, BitbucketCrawler | |||
from .indexer import GitIndexer, GitRepository | |||
from ..parser import start_parse_servers | |||
__all__ = ["crawl"] | |||
@@ -31,7 +30,6 @@ def crawl(): | |||
""" | |||
_configure_logging() | |||
parse_servers = start_parse_servers() | |||
time.sleep(5) | |||
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | |||
@@ -62,8 +60,6 @@ def crawl(): | |||
repo_clone_queue.queue.clear() | |||
for thread in threads: | |||
thread.join() | |||
for server in parse_servers: | |||
server.terminate() | |||
def _configure_logging(): | |||
# This isn't ideal, since it means the bitshift python package must be kept | |||
@@ -1,39 +1,71 @@ | |||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | |||
<modelVersion>4.0.0</modelVersion> | |||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | |||
<modelVersion>4.0.0</modelVersion> | |||
<groupId>com.bitshift.parsing</groupId> | |||
<artifactId>parsing</artifactId> | |||
<packaging>jar</packaging> | |||
<version>1.0-SNAPSHOT</version> | |||
<name>parsing</name> | |||
<url>http://maven.apache.org</url> | |||
<groupId>com.bitshift.parsing</groupId> | |||
<artifactId>parsing</artifactId> | |||
<packaging>jar</packaging> | |||
<version>1.0-SNAPSHOT</version> | |||
<name>parsing</name> | |||
<url>http://maven.apache.org</url> | |||
<dependencies> | |||
<dependency> | |||
<groupId>junit</groupId> | |||
<artifactId>junit</artifactId> | |||
<version>4.11</version> | |||
</dependency> | |||
<dependency> | |||
<groupId>org.eclipse.jdt</groupId> | |||
<artifactId>org.eclipse.jdt.core</artifactId> | |||
<version>3.7.1</version> | |||
</dependency> | |||
</dependencies> | |||
<dependencies> | |||
<dependency> | |||
<groupId>junit</groupId> | |||
<artifactId>junit</artifactId> | |||
<version>4.11</version> | |||
</dependency> | |||
<dependency> | |||
<groupId>org.eclipse.jdt</groupId> | |||
<artifactId>org.eclipse.jdt.core</artifactId> | |||
<version>3.7.1</version> | |||
</dependency> | |||
<dependency> | |||
<groupId>com.google.guava</groupId> | |||
<artifactId>guava</artifactId> | |||
<version>17.0</version> | |||
</dependency> | |||
</dependencies> | |||
<build> | |||
<plugins> | |||
<plugin> | |||
<groupId>org.codehaus.mojo</groupId> | |||
<artifactId>exec-maven-plugin</artifactId> | |||
<version>1.2.1</version> | |||
<configuration> | |||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||
<arguments> | |||
</arguments> | |||
</configuration> | |||
</plugin> | |||
<build> | |||
<plugins> | |||
<plugin> | |||
<groupId>org.codehaus.mojo</groupId> | |||
<artifactId>exec-maven-plugin</artifactId> | |||
<version>1.2.1</version> | |||
<configuration> | |||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||
<arguments> | |||
</arguments> | |||
</configuration> | |||
</plugin> | |||
<plugin> | |||
<artifactId>maven-assembly-plugin</artifactId> | |||
<version>2.4</version> | |||
<executions> | |||
<execution> | |||
<id>make-assembly</id> | |||
<phase>package</phase> | |||
<goals> | |||
<goal>single</goal> | |||
</goals> | |||
<configuration> | |||
<archive> | |||
<manifest> | |||
<addClasspath>true</addClasspath> | |||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||
</manifest> | |||
</archive> | |||
<descriptorRefs> | |||
<descriptorRef>jar-with-dependencies</descriptorRef> | |||
</descriptorRefs> | |||
<outputDirectory>${project.basedir}</outputDirectory> | |||
<finalName>${project.artifactId}</finalName> | |||
<appendAssemblyId>false</appendAssemblyId> | |||
</configuration> | |||
</execution> | |||
</executions> | |||
</plugin> | |||
</plugins> | |||
</build> | |||
@@ -1,13 +1,35 @@ | |||
package com.bitshift.parsing; | |||
import com.bitshift.parsing.utils.ParseServer; | |||
import java.io.BufferedReader; | |||
import java.io.BufferedWriter; | |||
import java.io.IOException; | |||
import java.io.InputStreamReader; | |||
import java.io.OutputStreamWriter; | |||
import com.bitshift.parsing.parsers.JavaParser; | |||
public class Parse { | |||
public static void main(String[] args) { | |||
ParseServer server = new ParseServer(Integer.parseInt(args[0])); | |||
System.out.println("Java Server listening on port " + args[0]); | |||
new Thread(server).start(); | |||
try { | |||
BufferedReader br = new BufferedReader( | |||
new InputStreamReader(System.in)); | |||
String str = ""; | |||
StringBuilder source = new StringBuilder(); | |||
while ((str = br.readLine()) != null) { | |||
source.append(str + "\n"); | |||
} | |||
String symbols = (new JavaParser(source.toString())).parse(); | |||
BufferedWriter bw = new BufferedWriter( | |||
new OutputStreamWriter(System.out)); | |||
bw.write(symbols); | |||
bw.flush(); | |||
} catch (IOException e) { | |||
} | |||
} | |||
} |
@@ -1,3 +0,0 @@ | |||
package com.bitshift.parsing.parsers; | |||
import com.bitshift.parsing.parsers.Parser; |
@@ -5,8 +5,9 @@ import java.util.List; | |||
import java.util.ArrayList; | |||
import java.util.Map; | |||
import java.util.Stack; | |||
import java.util.Arrays; | |||
import java.net.Socket; | |||
import com.google.common.base.Joiner; | |||
import org.eclipse.jdt.core.JavaCore; | |||
import org.eclipse.jdt.core.dom.AST; | |||
@@ -26,23 +27,20 @@ import org.eclipse.jdt.core.dom.Statement; | |||
import org.eclipse.jdt.core.dom.TypeDeclaration; | |||
import org.eclipse.jdt.core.dom.VariableDeclarationFragment; | |||
import com.bitshift.parsing.parsers.Parser; | |||
import com.bitshift.parsing.symbols.Symbols; | |||
import com.bitshift.parsing.symbols.JavaSymbols; | |||
/*TODO: Work on parsing partial java code.*/ | |||
public class JavaParser extends Parser { | |||
public class JavaParser { | |||
private String source; | |||
public JavaParser(Socket clientSocket) { | |||
super(clientSocket); | |||
public JavaParser(String source) { | |||
this.source = source; | |||
} | |||
@Override | |||
protected Symbols genSymbols() { | |||
char[] source = this.readFromClient().toCharArray(); | |||
private Symbols genSymbols() { | |||
ASTParser parser = ASTParser.newParser(AST.JLS3); | |||
parser.setSource(source); | |||
parser.setSource(this.source.toCharArray()); | |||
Map options = JavaCore.getOptions(); | |||
parser.setCompilerOptions(options); | |||
@@ -55,10 +53,9 @@ public class JavaParser extends Parser { | |||
return visitor.symbols; | |||
} | |||
@Override | |||
public void run() { | |||
public String parse() { | |||
JavaSymbols symbols = (JavaSymbols) this.genSymbols(); | |||
writeToClient(symbols.toString()); | |||
return symbols.toString(); | |||
} | |||
class NodeVisitor extends ASTVisitor { | |||
@@ -76,8 +73,10 @@ public class JavaParser extends Parser { | |||
public ArrayList<Integer> blockPosition(ASTNode node) { | |||
int sl = this.root.getLineNumber(node.getStartPosition()); | |||
int sc = this.root.getColumnNumber(node.getStartPosition()) + 1; | |||
int el = this.root.getLineNumber(node.getStartPosition() + node.getLength()); | |||
int ec = this.root.getColumnNumber(node.getStartPosition() + node.getLength()) + 1; | |||
int el = this.root.getLineNumber(node.getStartPosition() | |||
+ node.getLength() - 1); | |||
int ec = this.root.getColumnNumber(node.getStartPosition() | |||
+ node.getLength() - 1) + 1; | |||
return Symbols.createCoord(sl, sc, el, ec); | |||
} | |||
@@ -204,7 +203,12 @@ public class JavaParser extends Parser { | |||
public void endVisit(ImportDeclaration node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertImportStatement("\"" + name + "\"", data); | |||
String[] parts = name.split("\\."); | |||
for(int i = parts.length; i > 1; i--) { | |||
String pkg = Joiner.on(".").join(Arrays.copyOfRange(parts, 0, i)); | |||
this.symbols.insertImportStatement("\"" + pkg + "\"", data); | |||
} | |||
} | |||
} | |||
} |
@@ -8,14 +8,16 @@ import java.io.InputStreamReader; | |||
import java.io.OutputStreamWriter; | |||
import java.io.IOException; | |||
import java.nio.ByteBuffer; | |||
import java.net.Socket; | |||
import com.bitshift.parsing.symbols.Symbols; | |||
import com.bitshift.parsing.utils.PackableMemory; | |||
public abstract class Parser implements Runnable { | |||
protected Socket clientSocket; | |||
private String eos; | |||
public Parser(Socket clientSocket) { | |||
this.clientSocket = clientSocket; | |||
@@ -29,6 +31,7 @@ public abstract class Parser implements Runnable { | |||
new InputStreamReader(this.clientSocket.getInputStream())); | |||
int bytes = Integer.parseInt(clientReader.readLine()); | |||
this.eos = clientReader.readLine(); | |||
StringBuilder builder = new StringBuilder(); | |||
int i = 0; | |||
@@ -47,29 +50,13 @@ public abstract class Parser implements Runnable { | |||
return fromClient; | |||
} | |||
public String escapeUnicode(String input) { | |||
StringBuilder b = new StringBuilder(input.length()); | |||
Formatter f = new Formatter(b); | |||
for (char c : input.toCharArray()) { | |||
if (c < 128) { | |||
b.append(c); | |||
} else { | |||
f.format("\\u%04x", (int) c); | |||
} | |||
} | |||
return b.toString(); | |||
} | |||
protected void writeToClient(String toClient) { | |||
try { | |||
BufferedWriter clientWriter = new BufferedWriter( | |||
new OutputStreamWriter(this.clientSocket.getOutputStream())); | |||
PackableMemory mem = new PackableMemory(4); | |||
mem.pack(toClient.length(), 0); | |||
String dataSize = new String(mem.mem); | |||
clientWriter.write(toClient); | |||
clientWriter.write(eos); | |||
clientWriter.flush(); | |||
this.clientSocket.close(); | |||
} catch (IOException ex) { | |||
@@ -1 +0,0 @@ | |||
package com.bitshift.parsing.symbols; |
@@ -1,89 +0,0 @@ | |||
package com.bitshift.parsing.utils; | |||
//This class contains implementations of methods to | |||
// -- pack an integer into 4 consecutive bytes of a byte array | |||
// -- unpack an integer from 4 consecutive bytes of a byte array | |||
// -- exhaustively test the pack and unpack methods. | |||
// | |||
// This file should be saved as PackableMemory.java. Once it has been | |||
// compiled, the tester can be invoked by typing "java PackableMemory" | |||
public class PackableMemory { | |||
int size; | |||
public byte mem[] = null; | |||
public PackableMemory(int size) | |||
{ | |||
this.size = size; | |||
this.mem = new byte[size]; | |||
} | |||
// Pack the 4-byte integer val into the four bytes mem[loc]...mem[loc+3]. | |||
// The most significant porion of the integer is stored in mem[loc]. | |||
// Bytes are masked out of the integer and stored in the array, working | |||
// from right(least significant) to left (most significant). | |||
public void pack(int val, int loc) | |||
{ | |||
final int MASK = 0xff; | |||
for (int i = 3; i >= 0; i--) | |||
{ | |||
mem[loc+i] = (byte)(val & MASK); | |||
val = val >> 8; | |||
} | |||
} | |||
// Unpack the four bytes mem[loc]...mem[loc+3] into a 4-byte integer, | |||
// and return the resulting integer value. | |||
// The most significant porion of the integer is stored in mem[loc]. | |||
// Bytes are 'OR'ed into the integer, working from left (most significant) | |||
// to right (least significant) | |||
public int unpack(int loc) | |||
{ | |||
final int MASK = 0xff; | |||
int v = (int)mem[loc] & MASK; | |||
for (int i = 1; i < 4; i++) | |||
{ | |||
v = v << 8; | |||
v = v | ((int)mem[loc+i] & MASK); | |||
} | |||
return v; | |||
} | |||
// Test the above pack and unpack methods by iterating the following | |||
// over all possible 4-byte integers: pack the integer, | |||
// then unpack it, and then verify that the unpacked integer equals the | |||
// original integer. It tests all nonnegative numbers in ascending order | |||
// and then all negative numbers in ascending order. The transition from | |||
// positive to negative numbers happens implicitly due to integer overflow. | |||
public void packTest() | |||
{ | |||
int i = 0; | |||
long k = 0; | |||
do | |||
{ | |||
this.pack(i,4); | |||
int j = this.unpack(4); | |||
if (j != i) | |||
{ | |||
System.out.printf("pack/unpack test failed: i = %d, j = %d\n",i,j); | |||
System.exit(0); | |||
} | |||
i++; k++; | |||
} | |||
while (i != 0); | |||
System.out.printf("pack/unpack test successful, %d iterations\n",k); | |||
} | |||
// main routine to test the PackableMemory class by running the | |||
// packTest() method. | |||
public static void main(String[] args) | |||
{ | |||
PackableMemory pm = new PackableMemory(100); | |||
pm.packTest(); | |||
System.exit(0); | |||
} | |||
} | |||
@@ -1,65 +0,0 @@ | |||
/* Code for multithreaded server taken from Jakob Jenkov */ | |||
package com.bitshift.parsing.utils; | |||
import java.net.ServerSocket; | |||
import java.net.Socket; | |||
import java.io.IOException; | |||
import com.bitshift.parsing.parsers.JavaParser; | |||
public class ParseServer implements Runnable{ | |||
protected int serverPort = 8080; | |||
protected ServerSocket serverSocket = null; | |||
protected boolean isStopped = false; | |||
protected Thread runningThread= null; | |||
public ParseServer(int port){ | |||
this.serverPort = port; | |||
} | |||
public void run(){ | |||
synchronized(this){ | |||
this.runningThread = Thread.currentThread(); | |||
} | |||
openServerSocket(); | |||
while(! isStopped()){ | |||
Socket clientSocket = null; | |||
try { | |||
clientSocket = this.serverSocket.accept(); | |||
} catch (IOException e) { | |||
if(isStopped()) { | |||
System.out.println("Server Stopped.") ; | |||
return; | |||
} | |||
throw new RuntimeException( | |||
"Error accepting client connection", e); | |||
} | |||
new Thread(new JavaParser(clientSocket)).start(); | |||
} | |||
System.out.println("Server Stopped.") ; | |||
} | |||
private synchronized boolean isStopped() { | |||
return this.isStopped; | |||
} | |||
public synchronized void stop(){ | |||
this.isStopped = true; | |||
try { | |||
this.serverSocket.close(); | |||
} catch (IOException e) { | |||
throw new RuntimeException("Error closing server", e); | |||
} | |||
} | |||
private void openServerSocket() { | |||
try { | |||
this.serverSocket = new ServerSocket(this.serverPort); | |||
} catch (IOException e) { | |||
throw new RuntimeException("Cannot open port 8080", e); | |||
} | |||
} | |||
} |
@@ -1,23 +0,0 @@ | |||
package com.bitshift.parsing.utils; | |||
import java.util.List; | |||
import java.util.Arrays; | |||
public class Tuple<T> { | |||
private List<T> _objects; | |||
public Tuple(T... args) { | |||
_objects = Arrays.asList(args); | |||
} | |||
public String toString() { | |||
StringBuilder builder = new StringBuilder(); | |||
for(T o: this._objects) { | |||
builder.append(o + ","); | |||
} | |||
String s = builder.toString(); | |||
return "(" + s.substring(0, s.length() - 1) + ")"; | |||
} | |||
} |
@@ -1,26 +0,0 @@ | |||
require 'socket' | |||
require File.expand_path('../parser.rb', __FILE__) | |||
def start_server(port_number) | |||
server = TCPServer.new port_number | |||
puts "Ruby Server listening on port #{port_number}\n" | |||
loop do | |||
# Start a new thread for each client accepted | |||
Thread.start(server.accept) do |client| | |||
begin | |||
# Get the amount of data to be read | |||
size = (client.readline).to_i | |||
eos = ">}e^" | |||
p = Bitshift::Parser.new client.read(size) | |||
# Get the parsed result | |||
symbols = p.parse | |||
client.puts symbols | |||
client.puts eos | |||
ensure | |||
# Close the socket | |||
client.close | |||
end | |||
end | |||
end | |||
end |