diff --git a/bitshift/crawler/crawl.py b/bitshift/crawler/crawl.py index da6e102..fafd017 100644 --- a/bitshift/crawler/crawl.py +++ b/bitshift/crawler/crawl.py @@ -14,7 +14,6 @@ from threading import Event from .crawler import GitHubCrawler, BitbucketCrawler from .indexer import GitIndexer, GitRepository -from ..parser import start_parse_servers __all__ = ["crawl"] @@ -31,7 +30,6 @@ def crawl(): """ _configure_logging() - parse_servers = start_parse_servers() time.sleep(5) repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) @@ -62,8 +60,6 @@ def crawl(): repo_clone_queue.queue.clear() for thread in threads: thread.join() - for server in parse_servers: - server.terminate() def _configure_logging(): # This isn't ideal, since it means the bitshift python package must be kept diff --git a/parsers/java/pom.xml b/parsers/java/pom.xml index c2191b0..164422a 100644 --- a/parsers/java/pom.xml +++ b/parsers/java/pom.xml @@ -1,39 +1,71 @@ - - 4.0.0 + + 4.0.0 - com.bitshift.parsing - parsing - jar - 1.0-SNAPSHOT - parsing - http://maven.apache.org + com.bitshift.parsing + parsing + jar + 1.0-SNAPSHOT + parsing + http://maven.apache.org - - - junit - junit - 4.11 - - - org.eclipse.jdt - org.eclipse.jdt.core - 3.7.1 - - + + + junit + junit + 4.11 + + + org.eclipse.jdt + org.eclipse.jdt.core + 3.7.1 + + + com.google.guava + guava + 17.0 + + - - - - org.codehaus.mojo - exec-maven-plugin - 1.2.1 - - com.bitshift.parsing.Parse - - - - + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + + com.bitshift.parsing.Parse + + + + + + maven-assembly-plugin + 2.4 + + + make-assembly + package + + single + + + + + true + com.bitshift.parsing.Parse + + + + jar-with-dependencies + + ${project.basedir} + ${project.artifactId} + false + + + + diff --git a/parsers/java/src/main/java/com/bitshift/parsing/Parse.java b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java index 1964b59..64d537e 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/Parse.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java @@ -1,13 +1,35 @@ package com.bitshift.parsing; -import com.bitshift.parsing.utils.ParseServer; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; + +import com.bitshift.parsing.parsers.JavaParser; public class Parse { public static void main(String[] args) { - ParseServer server = new ParseServer(Integer.parseInt(args[0])); - System.out.println("Java Server listening on port " + args[0]); - new Thread(server).start(); + try { + BufferedReader br = new BufferedReader( + new InputStreamReader(System.in)); + + String str = ""; + StringBuilder source = new StringBuilder(); + while ((str = br.readLine()) != null) { + source.append(str + "\n"); + } + + String symbols = (new JavaParser(source.toString())).parse(); + BufferedWriter bw = new BufferedWriter( + new OutputStreamWriter(System.out)); + + bw.write(symbols); + bw.flush(); + } catch (IOException e) { + + } } } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java deleted file mode 100644 index dbe93fb..0000000 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java +++ /dev/null @@ -1,3 +0,0 @@ -package com.bitshift.parsing.parsers; - -import com.bitshift.parsing.parsers.Parser; diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java index e84895e..b3863a6 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -5,8 +5,9 @@ import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.Stack; +import java.util.Arrays; -import java.net.Socket; +import com.google.common.base.Joiner; import org.eclipse.jdt.core.JavaCore; import org.eclipse.jdt.core.dom.AST; @@ -26,23 +27,20 @@ import org.eclipse.jdt.core.dom.Statement; import org.eclipse.jdt.core.dom.TypeDeclaration; import org.eclipse.jdt.core.dom.VariableDeclarationFragment; -import com.bitshift.parsing.parsers.Parser; import com.bitshift.parsing.symbols.Symbols; import com.bitshift.parsing.symbols.JavaSymbols; /*TODO: Work on parsing partial java code.*/ -public class JavaParser extends Parser { +public class JavaParser { + private String source; - public JavaParser(Socket clientSocket) { - super(clientSocket); + public JavaParser(String source) { + this.source = source; } - @Override - protected Symbols genSymbols() { - char[] source = this.readFromClient().toCharArray(); - + private Symbols genSymbols() { ASTParser parser = ASTParser.newParser(AST.JLS3); - parser.setSource(source); + parser.setSource(this.source.toCharArray()); Map options = JavaCore.getOptions(); parser.setCompilerOptions(options); @@ -55,10 +53,9 @@ public class JavaParser extends Parser { return visitor.symbols; } - @Override - public void run() { + public String parse() { JavaSymbols symbols = (JavaSymbols) this.genSymbols(); - writeToClient(symbols.toString()); + return symbols.toString(); } class NodeVisitor extends ASTVisitor { @@ -76,8 +73,10 @@ public class JavaParser extends Parser { public ArrayList blockPosition(ASTNode node) { int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()) + 1; - int el = this.root.getLineNumber(node.getStartPosition() + node.getLength()); - int ec = this.root.getColumnNumber(node.getStartPosition() + node.getLength()) + 1; + int el = this.root.getLineNumber(node.getStartPosition() + + node.getLength() - 1); + int ec = this.root.getColumnNumber(node.getStartPosition() + + node.getLength() - 1) + 1; return Symbols.createCoord(sl, sc, el, ec); } @@ -204,7 +203,12 @@ public class JavaParser extends Parser { public void endVisit(ImportDeclaration node) { HashMap data = this._cache.pop(); String name = (String)data.remove("name"); - this.symbols.insertImportStatement("\"" + name + "\"", data); + String[] parts = name.split("\\."); + + for(int i = parts.length; i > 1; i--) { + String pkg = Joiner.on(".").join(Arrays.copyOfRange(parts, 0, i)); + this.symbols.insertImportStatement("\"" + pkg + "\"", data); + } } } } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java index b8e4e60..79a5498 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java @@ -8,14 +8,16 @@ import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.IOException; +import java.nio.ByteBuffer; + import java.net.Socket; import com.bitshift.parsing.symbols.Symbols; -import com.bitshift.parsing.utils.PackableMemory; public abstract class Parser implements Runnable { protected Socket clientSocket; + private String eos; public Parser(Socket clientSocket) { this.clientSocket = clientSocket; @@ -29,6 +31,7 @@ public abstract class Parser implements Runnable { new InputStreamReader(this.clientSocket.getInputStream())); int bytes = Integer.parseInt(clientReader.readLine()); + this.eos = clientReader.readLine(); StringBuilder builder = new StringBuilder(); int i = 0; @@ -47,29 +50,13 @@ public abstract class Parser implements Runnable { return fromClient; } - public String escapeUnicode(String input) { - StringBuilder b = new StringBuilder(input.length()); - Formatter f = new Formatter(b); - for (char c : input.toCharArray()) { - if (c < 128) { - b.append(c); - } else { - f.format("\\u%04x", (int) c); - } - } - return b.toString(); - } - protected void writeToClient(String toClient) { try { BufferedWriter clientWriter = new BufferedWriter( new OutputStreamWriter(this.clientSocket.getOutputStream())); - PackableMemory mem = new PackableMemory(4); - mem.pack(toClient.length(), 0); - String dataSize = new String(mem.mem); - clientWriter.write(toClient); + clientWriter.write(eos); clientWriter.flush(); this.clientSocket.close(); } catch (IOException ex) { diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java deleted file mode 100644 index 9abd60d..0000000 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java +++ /dev/null @@ -1 +0,0 @@ -package com.bitshift.parsing.symbols; diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java deleted file mode 100644 index 83babfb..0000000 --- a/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java +++ /dev/null @@ -1,89 +0,0 @@ -package com.bitshift.parsing.utils; - -//This class contains implementations of methods to -// -- pack an integer into 4 consecutive bytes of a byte array -// -- unpack an integer from 4 consecutive bytes of a byte array -// -- exhaustively test the pack and unpack methods. -// -// This file should be saved as PackableMemory.java. Once it has been -// compiled, the tester can be invoked by typing "java PackableMemory" - -public class PackableMemory { - int size; - public byte mem[] = null; - - public PackableMemory(int size) - { - this.size = size; - this.mem = new byte[size]; - } - - // Pack the 4-byte integer val into the four bytes mem[loc]...mem[loc+3]. - // The most significant porion of the integer is stored in mem[loc]. - // Bytes are masked out of the integer and stored in the array, working - // from right(least significant) to left (most significant). - public void pack(int val, int loc) - { - final int MASK = 0xff; - for (int i = 3; i >= 0; i--) - { - mem[loc+i] = (byte)(val & MASK); - val = val >> 8; - } - } - - // Unpack the four bytes mem[loc]...mem[loc+3] into a 4-byte integer, - // and return the resulting integer value. - // The most significant porion of the integer is stored in mem[loc]. - // Bytes are 'OR'ed into the integer, working from left (most significant) - // to right (least significant) - public int unpack(int loc) - { - final int MASK = 0xff; - int v = (int)mem[loc] & MASK; - for (int i = 1; i < 4; i++) - { - v = v << 8; - v = v | ((int)mem[loc+i] & MASK); - } - return v; - } - - - - // Test the above pack and unpack methods by iterating the following - // over all possible 4-byte integers: pack the integer, - // then unpack it, and then verify that the unpacked integer equals the - // original integer. It tests all nonnegative numbers in ascending order - // and then all negative numbers in ascending order. The transition from - // positive to negative numbers happens implicitly due to integer overflow. - public void packTest() - { - - int i = 0; - long k = 0; - do - { - this.pack(i,4); - int j = this.unpack(4); - if (j != i) - { - System.out.printf("pack/unpack test failed: i = %d, j = %d\n",i,j); - System.exit(0); - } - i++; k++; - } - while (i != 0); - System.out.printf("pack/unpack test successful, %d iterations\n",k); - } - - // main routine to test the PackableMemory class by running the - // packTest() method. - public static void main(String[] args) - { - PackableMemory pm = new PackableMemory(100); - pm.packTest(); - System.exit(0); - } -} - diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/ParseServer.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/ParseServer.java deleted file mode 100644 index 291be34..0000000 --- a/parsers/java/src/main/java/com/bitshift/parsing/utils/ParseServer.java +++ /dev/null @@ -1,65 +0,0 @@ -/* Code for multithreaded server taken from Jakob Jenkov */ -package com.bitshift.parsing.utils; - -import java.net.ServerSocket; -import java.net.Socket; -import java.io.IOException; - -import com.bitshift.parsing.parsers.JavaParser; - -public class ParseServer implements Runnable{ - - protected int serverPort = 8080; - protected ServerSocket serverSocket = null; - protected boolean isStopped = false; - protected Thread runningThread= null; - - public ParseServer(int port){ - this.serverPort = port; - } - - public void run(){ - synchronized(this){ - this.runningThread = Thread.currentThread(); - } - openServerSocket(); - while(! isStopped()){ - Socket clientSocket = null; - try { - clientSocket = this.serverSocket.accept(); - } catch (IOException e) { - if(isStopped()) { - System.out.println("Server Stopped.") ; - return; - } - throw new RuntimeException( - "Error accepting client connection", e); - } - new Thread(new JavaParser(clientSocket)).start(); - } - System.out.println("Server Stopped.") ; - } - - - private synchronized boolean isStopped() { - return this.isStopped; - } - - public synchronized void stop(){ - this.isStopped = true; - try { - this.serverSocket.close(); - } catch (IOException e) { - throw new RuntimeException("Error closing server", e); - } - } - - private void openServerSocket() { - try { - this.serverSocket = new ServerSocket(this.serverPort); - } catch (IOException e) { - throw new RuntimeException("Cannot open port 8080", e); - } - } - -} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/Tuple.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/Tuple.java deleted file mode 100644 index 115a3c6..0000000 --- a/parsers/java/src/main/java/com/bitshift/parsing/utils/Tuple.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.bitshift.parsing.utils; - -import java.util.List; -import java.util.Arrays; - -public class Tuple { - private List _objects; - - public Tuple(T... args) { - _objects = Arrays.asList(args); - } - - public String toString() { - StringBuilder builder = new StringBuilder(); - - for(T o: this._objects) { - builder.append(o + ","); - } - - String s = builder.toString(); - return "(" + s.substring(0, s.length() - 1) + ")"; - } -} diff --git a/parsers/ruby/lib/parse_server.rb b/parsers/ruby/lib/parse_server.rb deleted file mode 100644 index b24d263..0000000 --- a/parsers/ruby/lib/parse_server.rb +++ /dev/null @@ -1,26 +0,0 @@ -require 'socket' -require File.expand_path('../parser.rb', __FILE__) - -def start_server(port_number) - server = TCPServer.new port_number - puts "Ruby Server listening on port #{port_number}\n" - - loop do - # Start a new thread for each client accepted - Thread.start(server.accept) do |client| - begin - # Get the amount of data to be read - size = (client.readline).to_i - eos = ">}e^" - p = Bitshift::Parser.new client.read(size) - # Get the parsed result - symbols = p.parse - client.puts symbols - client.puts eos - ensure - # Close the socket - client.close - end - end - end -end