diff --git a/.gitignore b/.gitignore index 47be6b3..d00ac68 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,4 @@ target # Ctags */tags -log +logs diff --git a/app.py b/app.py index 303ff87..274daac 100644 --- a/app.py +++ b/app.py @@ -2,13 +2,14 @@ Module to contain all the project's Flask server plumbing. """ -from flask import Flask -from flask import render_template, session +from json import dumps + +from flask import Flask, make_response, render_template, request from bitshift import assets -from bitshift import languages -# from bitshift.database import Database -# from bitshift.query import parse_query +from bitshift.database import Database +from bitshift.languages import LANGS +from bitshift.query import parse_query, QueryParseException app = Flask(__name__) app.config.from_object("bitshift.config") @@ -17,17 +18,33 @@ app_env = app.jinja_env app_env.line_statement_prefix = "=" app_env.globals.update(assets=assets) -# database = Database() +database = Database() @app.route("/") def index(): - return render_template("index.html", autocomplete_languages=languages.LANGS) - -@app.route("/search/") -def search(query): - # tree = parse_query(query) - # database.search(tree) - pass + return render_template("index.html", autocomplete_languages=LANGS) + +@app.route("/search.json") +def search(): + def reply(json): + resp = make_response(dumps(json)) + resp.mimetype = "application/json" + return resp + + query, page = request.args.get("q"), request.args.get("p", 1) + if not query: + return reply({"error": "No query given"}) + try: + tree = parse_query(query) + except QueryParseException as exc: + return reply({"error": exc.args[0]}) + try: + page = int(page) + except ValueError: + return reply({"error": u"Invalid page number: %s" % page}) + count, codelets = database.search(tree, page) + results = [clt.serialize() for clt in codelets] + return reply({"count": count, "results": results}) @app.route("/about") def about(): diff --git a/bitshift/codelet.py b/bitshift/codelet.py index 92debf4..3021ffe 100644 --- a/bitshift/codelet.py +++ b/bitshift/codelet.py @@ -1,3 +1,5 @@ +from .languages import LANGS + __all__ = ["Codelet"] class Codelet(object): @@ -11,7 +13,7 @@ class Codelet(object): :ivar authors: (array of tuples (str, str or None)) An array of tuples containing an author's name and profile URL (on the service the code was pulled from). - :ivar code_url: (str) The url of the (page containing the) source code. + :ivar url: (str) The url of the (page containing the) source code. :ivar date_created: (:class:`datetime.datetime`, or None) The date the code was published. :ivar date_modified: (:class:`datetime.datetime`, or None) The date the @@ -24,8 +26,8 @@ class Codelet(object): added by the database. """ - def __init__(self, name, code, filename, language, authors, code_url, - date_created, date_modified, rank, symbols=None, origin=None): + def __init__(self, name, code, filename, language, authors, url, + date_created, date_modified, rank, symbols=None, origin=None): """ Create a Codelet instance. @@ -34,7 +36,7 @@ class Codelet(object): :param filename: see :attr:`self.filename` :param language: see :attr:`self.language` :param authors: see :attr:`self.authors` - :param code_url: see :attr:`self.code_url` + :param url: see :attr:`self.url` :param date_created: see :attr:`self.date_created` :param date_modified: see :attr:`self.date_modified` :param rank: see :attr:`self.rank` @@ -46,7 +48,7 @@ class Codelet(object): :type filename: see :attr:`self.filename` :type language: see :attr:`self.language` :type authors: see :attr:`self.authors` - :type code_url: see :attr:`self.code_url` + :type url: see :attr:`self.url` :type date_created: see :attr:`self.date_created` :type date_modified: see :attr:`self.date_modified` :type rank: see :attr:`self.rank` @@ -59,9 +61,24 @@ class Codelet(object): self.filename = filename self.language = language self.authors = authors - self.code_url = code_url + self.url = url self.date_created = date_created self.date_modified = date_modified self.rank = rank self.symbols = symbols or {} self.origin = origin or (None, None, None) + + def serialize(self): + """ + Convert the codelet into a dictionary that can be sent as JSON. + + :return: The codelet as a dictionary. + :rtype: str + """ + return { + "name": self.name, "code": self.code, "lang": LANGS[self.language], + "authors": self.authors, "url": self.url, + "created": self.date_created.isoformat(), + "modified": self.date_modified.isoformat(), + "symbols": self.symbols, "origin": self.origin + } diff --git a/bitshift/crawler/__init__.py b/bitshift/crawler/__init__.py index 73b1c22..e69de29 100644 --- a/bitshift/crawler/__init__.py +++ b/bitshift/crawler/__init__.py @@ -1,55 +0,0 @@ -""" -:synopsis: Parent crawler module, which supervises all crawlers. - -Contains functions for initializing all subsidiary, threaded crawlers. -""" - -import logging, logging.handlers, os, Queue - -from bitshift.crawler import crawler, indexer - -__all__ = ["crawl"] - -def crawl(): - """ - Initialize all crawlers (and indexers). - - Start the: - 1. GitHub crawler, :class:`crawler.GitHubCrawler`. - 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`. - 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`. - """ - - _configure_logging() - - MAX_URL_QUEUE_SIZE = 5e3 - - repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) - threads = [crawler.GitHubCrawler(repo_clone_queue), - crawler.BitbucketCrawler(repo_clone_queue), - indexer.GitIndexer(repo_clone_queue)] - - for thread in threads: - thread.start() - -def _configure_logging(): - LOG_FILE_DIR = "log" - - if not os.path.exists(LOG_FILE_DIR): - os.mkdir(LOG_FILE_DIR) - - logging.getLogger("requests").setLevel(logging.WARNING) - logging.getLogger("urllib3").setLevel(logging.WARNING) - - formatter = logging.Formatter( - fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s" - " %(message)s"), datefmt="%y-%m-%d %H:%M:%S") - - handler = logging.handlers.TimedRotatingFileHandler( - "%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1, - backupCount=20) - handler.setFormatter(formatter) - - root_logger = logging.getLogger() - root_logger.addHandler(handler) - root_logger.setLevel(logging.NOTSET) diff --git a/bitshift/crawler/crawl.py b/bitshift/crawler/crawl.py new file mode 100644 index 0000000..b91fc95 --- /dev/null +++ b/bitshift/crawler/crawl.py @@ -0,0 +1,65 @@ +""" +:synopsis: Parent crawler module, which supervises all crawlers. + +Contains functions for initializing all subsidiary, threaded crawlers. +""" + +import logging, logging.handlers, os, Queue + +from bitshift.crawler import crawler, indexer +from bitshift.parser import parse, start_parse_servers + +__all__ = ["crawl"] + +def crawl(): + """ + Initialize all crawlers (and indexers). + + Start the: + 1. GitHub crawler, :class:`crawler.GitHubCrawler`. + 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`. + 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`. + """ + + _configure_logging() + + MAX_URL_QUEUE_SIZE = 5e3 + + repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) + threads = [crawler.GitHubCrawler(repo_clone_queue), + crawler.BitbucketCrawler(repo_clone_queue), + indexer.GitIndexer(repo_clone_queue)] + + for thread in threads: + thread.start() + + parse_servers = start_parse_servers() + +def _configure_logging(): + # This isn't ideal, since it means the bitshift python package must be kept + # inside the app, but it works for now: + root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) + log_dir = os.path.join(root, "logs") + + if not os.path.exists(log_dir): + os.mkdir(log_dir) + + logging.getLogger("requests").setLevel(logging.WARNING) + logging.getLogger("urllib3").setLevel(logging.WARNING) + + formatter = logging.Formatter( + fmt=("%(asctime)s %(levelname)s %(name)s:%(funcName)s" + " %(message)s"), datefmt="%y-%m-%d %H:%M:%S") + + handler = logging.handlers.TimedRotatingFileHandler( + "%s/%s" % (log_dir, "app.log"), when="H", interval=1, + backupCount=20) + handler.setFormatter(formatter) + + root_logger = logging.getLogger() + root_logger.addHandler(handler) + root_logger.setLevel(logging.NOTSET) + +if __name__ == "__main__": + _configure_logging() + crawl() diff --git a/bitshift/crawler/indexer.py b/bitshift/crawler/indexer.py index c1c77ad..5b5e83d 100644 --- a/bitshift/crawler/indexer.py +++ b/bitshift/crawler/indexer.py @@ -7,6 +7,7 @@ import bs4, datetime, logging, os, Queue, re, shutil, string, subprocess, time,\ threading from ..database import Database +from ..parser import parse, UnsupportedFileError from ..codelet import Codelet GIT_CLONE_DIR = "/tmp/bitshift" @@ -73,6 +74,7 @@ class GitIndexer(threading.Thread): self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE) self.git_cloner = _GitCloner(clone_queue, self.index_queue) self.git_cloner.start() + self.database = Database() self._logger = logging.getLogger("%s.%s" % (__name__, self.__class__.__name__)) self._logger.info("Starting.") @@ -98,10 +100,7 @@ class GitIndexer(threading.Thread): repo = self.index_queue.get() self.index_queue.task_done() - try: - self._index_repository(repo) - except Exception as excep: - self._logger.warning("%s: %s.", excep.__class__.__name__, excep) + self._index_repository(repo) def _index_repository(self, repo): """ @@ -119,10 +118,15 @@ class GitIndexer(threading.Thread): try: self._insert_repository_codelets(repo) except Exception as excep: - self._logger.warning("%s: %s.", excep.__class__.__name__, excep) - - if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)): - shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name)) + self._logger.exception("Exception raised while indexing:") + finally: + if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)): + if len([obj for obj in os.listdir('.') if + os.path.isdir(obj)]) <= 1: + shutil.rmtree("%s/%s" % ( + GIT_CLONE_DIR, repo.name.split("/")[0])) + else: + shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name)) def _insert_repository_codelets(self, repo): """ @@ -147,17 +151,22 @@ class GitIndexer(threading.Thread): source = self._decode(source_file.read()) if source is None: continue - except IOError as exception: + except IOError: continue - authors = [(self._decode(author), None) for author in \ - commits_meta[filename]["authors"]] + authors = [(self._decode(author), None) for author in + commits_meta[filename]["authors"]] codelet = Codelet("%s:%s" % (repo.name, filename), source, filename, None, authors, self._generate_file_url(filename, repo.url, repo.framework_name), commits_meta[filename]["time_created"], commits_meta[filename]["time_last_modified"], repo.rank) + try: + parse(codelet) + except UnsupportedFileError: + continue + self.database.insert(codelet) def _generate_file_url(self, filename, repo_url, framework_name): """ diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py index e4fa430..311eb21 100644 --- a/bitshift/database/__init__.py +++ b/bitshift/database/__init__.py @@ -63,7 +63,7 @@ class Database(object): query, args = tree.build_query(page) cursor.execute(query, args) ids = [id for id, _ in cursor.fetchall()] - num_results = 0 # TODO: NotImplemented + num_results = len(ids) # TODO: NotImplemented return ids, num_results def _get_authors_for_codelet(self, cursor, codelet_id): @@ -103,13 +103,13 @@ class Database(object): WHERE codelet_id = ?""" with self._conn.cursor(oursql.DictCursor) as dict_cursor: - dict_cursor.executemany(query, [(id,) for id in ids]) - for row in dict_cursor.fetchone(): - codelet_id = row["codelet_id"] + for codelet_id in ids: + dict_cursor.execute(query, (codelet_id,)) + row = dict_cursor.fetchall()[0] if row["origin_url_base"]: - url = row["codelet_url"] - else: url = row["origin_url_base"] + row["codelet_url"] + else: + url = row["codelet_url"] origin = (row["origin_name"], row["origin_url"], row["origin_image"]) authors = self._get_authors_for_codelet(cursor, codelet_id) @@ -160,28 +160,31 @@ class Database(object): :return: The total number of results, and the *n*\ th page of results. :rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) """ - query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp + query1 = "SELECT 1 FROM cache WHERE cache_id = ?" + query2 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp FROM cache INNER JOIN cache_data ON cache_id = cdata_cache WHERE cache_id = ?""" - query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" - query3 = "INSERT INTO cache_data VALUES (?, ?)" + query3 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" + query4 = "INSERT INTO cache_data VALUES (?, ?)" cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] with self._conn.cursor() as cursor: cursor.execute(query1, (cache_id,)) - results = cursor.fetchall() - if results: # Cache hit - num_results = results[0][1] * (10 ** results[0][2]) - ids = [res[0] for res in results] - else: # Cache miss + cache_hit = cursor.fetchall() + if cache_hit: + cursor.execute(query2, (cache_id,)) + rows = cursor.fetchall() + num_results = rows[0][1] * (10 ** rows[0][2]) if rows else 0 + ids = [row[0] for row in rows] + else: ids, num_results = self._search_with_query(cursor, query, page) num_exp = max(len(str(num_results)) - 3, 0) num_results = int(round(num_results, -num_exp)) num_mnt = num_results / (10 ** num_exp) - cursor.execute(query2, (cache_id, num_mnt, num_exp)) - cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) + cursor.execute(query3, (cache_id, num_mnt, num_exp)) + cursor.executemany(query4, [(cache_id, c_id) for c_id in ids]) codelet_gen = self._get_codelets_from_ids(cursor, ids) return (num_results, list(codelet_gen)) diff --git a/bitshift/languages.json b/bitshift/languages.json new file mode 100644 index 0000000..d855164 --- /dev/null +++ b/bitshift/languages.json @@ -0,0 +1,283 @@ +{ + "_comment" : "A list of programming languages supported by `bitshift`.", + "languages" : [ + "Debian Sourcelist", + "Delphi", + "JavaScript+Mako", + "Brainfuck", + "Ceylon", + "JavaScript+Django/Jinja", + "HTML+Evoque", + "NumPy", + "Modula-2", + "LiveScript", + "Nimrod", + "Bash", + "HTML+Django/Jinja", + "CSS+PHP", + "XML+Lasso", + "VimL", + "CSS+Genshi Text", + "Fancy", + "Coldfusion HTML", + "cfstatement", + "Scalate Server Page", + "Smarty", + "XML+Evoque", + "haXe", + "PowerShell", + "Tea", + "HTML+Cheetah", + "Mason", + "Django/Jinja", + "JAGS", + "ApacheConf", + "DTD", + "Lighttpd configuration file", + "Java", + "JavaScript+Genshi Text", + "Scheme", + "Nemerle", + "RHTML", + "Ragel in Java Host", + "Darcs Patch", + "Puppet", + "Octave", + "CoffeeScript", + "Ragel in D Host", + "Scilab", + "Monkey", + "HTML+Myghty", + "CSS", + "JavaScript+Smarty", + "Io", + "COBOLFree", + "Asymptote", + "vhdl", + "CSS+Ruby", + "Fortran", + "d-objdump", + "MySQL", + "REBOL", + "C++", + "ERB", + "CBM BASIC V2", + "Befunge", + "Julia", + "MoonScript", + "Ruby", + "XML+Smarty", + "Dylan", + "Groovy", + "MoinMoin/Trac Wiki markup", + "autohotkey", + "C", + "HTML", + "Felix", + "CMake", + "NSIS", + "SourcePawn", + "Mako", + "VGL", + "Velocity", + "Koka", + "CUDA", + "Gnuplot", + "IRC logs", + "Prolog", + "Python", + "CSS+Django/Jinja", + "verilog", + "Smalltalk", + "JavaScript+Myghty", + "YAML", + "Julia console", + "ANTLR With ActionScript Target", + "XML+Mako", + "XSLT", + "UrbiScript", + "Scaml", + "S", + "DylanLID", + "MAQL", + "sqlite3con", + "Boo", + "OCaml", + "eC", + "ActionScript", + "VB.net", + "SquidConf", + "XQuery", + "D", + "Fantom", + "Gettext Catalog", + "Logos", + "Lasso", + "SCSS", + "BBCode", + "Haml", + "FoxPro", + "MuPAD", + "XML+Ruby", + "Dart", + "IDL", + "dg", + "Evoque", + "Jade", + "c-objdump", + "Kconfig", + "Java Server Page", + "reg", + "ABAP", + "XML+Velocity", + "JavaScript+Cheetah", + "HTML+Mako", + "Ragel in Ruby Host", + "RobotFramework", + "Protocol Buffer", + "CFEngine3", + "Ragel", + "GLSL", + "COBOL", + "TypeScript", + "Ada", + "PostgreSQL SQL dialect", + "Xtend", + "Logtalk", + "objdump", + "CSS+Mako", + "ca65", + "Objective-C++", + "Gherkin", + "HTML+PHP", + "Makefile", + "PostScript", + "Hxml", + "Kotlin", + "PL/pgSQL", + "Vala", + "Haskell", + "Bro", + "Lua", + "POVRay", + "Sass", + "ANTLR With Java Target", + "Tcl", + "ANTLR With ObjectiveC Target", + "JavaScript+Ruby", + "Racket", + "AspectJ", + "Base Makefile", + "ANTLR With Python Target", + "cpp-objdump", + "Genshi Text", + "Ioke", + "PyPy Log", + "Croc", + "Objective-J", + "GAS", + "Batchfile", + "Snobol", + "XML", + "ANTLR", + "Opa", + "XML+Cheetah", + "Go", + "Diff", + "MiniD", + "Cython", + "Ragel in C Host", + "Erlang", + "Debian Control file", + "aspx-vb", + "BUGS", + "Ragel in CPP Host", + "aspx-cs", + "Properties", + "Groff", + "Clojure", + "Modelica", + "QML", + "JavaScript+Lasso", + "ANTLR With Perl Target", + "Genshi", + "BlitzMax", + "Treetop", + "Matlab", + "Myghty", + "HTML+Genshi", + "Duel", + "Perl", + "FSharp", + "reStructuredText", + "NewLisp", + "Scala", + "CSS+Lasso", + "XML+PHP", + "Stan", + "INI", + "MOOCode", + "Shell Session", + "RPMSpec", + "Newspeak", + "Bash Session", + "Coq", + "Raw token data", + "Tcsh", + "HTML+Lasso", + "C#", + "Gosu Template", + "RConsole", + "MXML", + "TeX", + "CSS+Smarty", + "Text only", + "ANTLR With C# Target", + "OpenEdge ABL", + "Cheetah", + "Smali", + "CSS+Myghty", + "Rd", + "LLVM", + "Standard ML", + "Elixir", + "Nginx configuration file", + "GoodData-CL", + "AppleScript", + "HTML+Smarty", + "Objective-C", + "JavaScript", + "Rust", + "Common Lisp", + "Embedded Ragel", + "ActionScript 3", + "systemverilog", + "Literate Haskell", + "PHP", + "ANTLR With CPP Target", + "Gosu", + "Hybris", + "JavaScript+PHP", + "Factor", + "HTML+Velocity", + "Mscgen", + "Ooc", + "SQL", + "HTTP", + "ECL", + "Redcode", + "Ragel in Objective C Host", + "XML+Django/Jinja", + "Awk", + "JSON", + "NASM", + "ANTLR With Ruby Target", + "XML+Myghty", + "AutoIt", + "Mako", + "CSS+Mako", + "HTML+Mako", + "XML+Mako", + "JavaScript+Mako" + ] +} diff --git a/bitshift/languages.py b/bitshift/languages.py index 78c0830..36d7f63 100644 --- a/bitshift/languages.py +++ b/bitshift/languages.py @@ -1,5 +1,5 @@ import json +from os import path -with open("bitshift/resources/languages.json") as lang_json: - LANGS = [lang.encode("ascii","ignore") for lang in - json.load(lang_json)["languages"]] +with open(path.join(path.dirname(__file__), "languages.json")) as lang_json: + LANGS = [lang for lang in json.load(lang_json)["languages"]] diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index bc22514..5cd9446 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,8 +1,25 @@ -import json, pygments.lexers as pgl, sys, socket, struct +import json +import sys +import socket +import struct +import subprocess + +from os import path +from pygments import lexers as pgl, util + from ..languages import LANGS from .python import parse_py -_all__ = ["parse"] +_all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] + +PARSER_COMMANDS = [ + ('Java', ['mvn', '-f', + path.join(path.dirname(__file__), "../../parsers/java/pom.xml"), + 'exec:java', '-Dexec.args="%d"']), + ('Ruby', ['rake', '-f', + path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"), + "'start_server[%d]'"]) +] class UnsupportedFileError(Exception): pass @@ -19,13 +36,15 @@ def _lang(codelet): Modify function to incorporate tags from stackoverflow. """ - if codelet.filename is not None: - try: - return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name - except: - raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') + try: + if codelet.filename: + lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code) + else: + lex = pgl.guess_lexer(codelet.code) + except util.ClassNotFound: + raise UnsupportedFileError(codelet.filename) - return LANGS.index(pgl.guess_lexer(codelet.code)) + return LANGS.index(lex.name) def _recv_data(server_socket): """ @@ -39,8 +58,9 @@ def _recv_data(server_socket): """ recv_size = 8192 - total_data = []; size_data = cur_data = '' - total_size = 0; size = sys.maxint + total_data = [] + size_data = cur_data = '' + total_size, size = 0, sys.maxint while total_size < size: cur_data = server_socket.recv(recv_size) @@ -61,8 +81,23 @@ def _recv_data(server_socket): total_size = sum([len(s) for s in total_data]) server_socket.close() - return ''.join(total_data); + return ''.join(total_data) + +def start_parse_servers(): + """ + Starts all the parse servers for languages besides python. + :rtype: list + """ + + procs = [] + + for (lang, cmd) in PARSER_COMMANDS: + procs.append( + subprocess.Popen(' '.join(cmd) % (5001 + LANGS.index(lang)), + shell=True)) + + return procs def parse(codelet): """ @@ -76,9 +111,10 @@ def parse(codelet): :type code: Codelet """ - lang = _lang(codelet); source = codelet.code + lang = _lang(codelet) + source = codelet.code codelet.language = lang - server_socket_number = 5000 + lang + server_socket_number = 5001 + lang if lang == LANGS.index('Python'): parse_py(codelet) @@ -86,8 +122,13 @@ def parse(codelet): else: server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_socket.connect(("localhost", server_socket_number)) - server_socket.send("%d\n%s" % (len(source), source)); + server_socket.send("%d\n%s" % (len(source), source)) symbols = json.loads(_recv_data(server_socket)) - codelet.symbols = symbols + symbols = {key: [(name, [tuple(loc) + for loc in syms[name]['assignments']], + [tuple(loc) for loc in syms[name]['uses']]) + for name in syms.keys()] + for key, syms in symbols.iteritems()} + codelet.symbols = symbols diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index d0cd7d3..3cb141d 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -1,4 +1,7 @@ import ast +import re + +encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) class _CachedWalker(ast.NodeVisitor): """ @@ -154,7 +157,26 @@ def parse_py(codelet): :type code: Codelet """ - tree = ast.parse(codelet.code) + def strip_encoding(lines): + """Strips the encoding line from a file, which breaks the parser.""" + it = iter(lines) + try: + first = next(it) + if not encoding_re.match(first): + yield first + second = next(it) + if not encoding_re.match(second): + yield second + except StopIteration: + return + for line in it: + yield line + + try: + tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines()))) + except SyntaxError: + ## TODO: add some logging here? + return cutter = _CachedWalker() cutter.visit(tree) codelet.symbols = cutter.accum diff --git a/bitshift/query/__init__.py b/bitshift/query/__init__.py index 41d01cf..73d6f65 100644 --- a/bitshift/query/__init__.py +++ b/bitshift/query/__init__.py @@ -36,10 +36,75 @@ class _QueryParser(object): self._parse_variable: ["v", "var", "variable"] } + def _scan_query(self, query, markers): + """Scan a query (sub)string for the first occurance of some markers. + + Returns a 2-tuple of (first_marker_found, marker_index). + """ + def is_escaped(query, index): + """Return whether a query marker is backslash-escaped.""" + return (index > 0 and query[index - 1] == "\\" and + (index < 2 or query[index - 2] != "\\")) + + best_marker, best_index = None, maxsize + for marker in markers: + index = query.find(marker) + if is_escaped(query, index): + _, new_index = self._scan_query(query[index + 1:], marker) + index += new_index + 1 + if index >= 0 and index < best_index: + best_marker, best_index = marker, index + return best_marker, best_index + + def _split_query(self, query, markers, parens=False): + """Split a query string into a nested list of query terms. + + Returns a list of terms and/or nested sublists of terms. Each term and + sublist is guarenteed to be non-empty. + """ + query = query.lstrip() + if not query: + return [] + marker, index = self._scan_query(query, markers) + if not marker: + return [query] + nest = [query[:index]] if index > 0 else [] + after = query[index + 1:] + + if marker == " ": + nest += self._split_query(after, markers, parens) + elif marker in ('"', "'"): + close_marker, close_index = self._scan_query(after, marker) + if close_marker: + if close_index > 0: + nest.append(after[:close_index]) + after = after[close_index + 1:] + nest += self._split_query(after, markers, parens) + elif after: + nest.append(after) + elif marker == "(": + inner, after = self._split_query(after, markers, True), [] + if inner and isinstance(inner[-1], tuple): + after = self._split_query(inner.pop()[0], markers, parens) + if inner: + nest.append(inner) + if after: + nest += after + elif marker == ")": + if parens: + nest.append((after,)) + else: + nest += self._split_query(after, markers) + return nest + def _parse_literal(self, literal): """Parse part of a search query into a string or regular expression.""" if literal.startswith(("r:", "re:", "regex:", "regexp:")): - return Regex(literal.split(":", 1)[1]) + arg = literal.split(":", 1)[1] + if not arg: + err = 'Incomplete query term: "%s"' % literal + raise QueryParseException(err) + return Regex(arg) return String(literal) def _parse_language(self, term): @@ -98,21 +163,29 @@ class _QueryParser(object): """Parse part of a query into a date created node and return it.""" return self._parse_date(term, Date.CREATE) - def _parse_symbol(self, term): + def _parse_symbol(self, term, stype=Symbol.ALL): """Parse part of a query into a symbol node and return it.""" - return Symbol(Symbol.ALL, self._parse_literal(term)) + literal = self._parse_literal(term) + if isinstance(literal, String): + make_symbol = lambda lit: Symbol(stype, String(lit)) + symbols = self._split_query(literal.string, " \"'") + node = make_symbol(symbols.pop()) + while symbols: + node = BinaryOp(make_symbol(symbols.pop()), BinaryOp.OR, node) + return node + return Symbol(stype, literal) def _parse_function(self, term): """Parse part of a query into a function node and return it.""" - return Symbol(Symbol.FUNCTION, self._parse_literal(term)) + return self._parse_symbol(term, Symbol.FUNCTION) def _parse_class(self, term): """Parse part of a query into a class node and return it.""" - return Symbol(Symbol.CLASS, self._parse_literal(term)) + return self._parse_symbol(term, Symbol.CLASS) def _parse_variable(self, term): """Parse part of a query into a variable node and return it.""" - return Symbol(Symbol.VARIABLE, self._parse_literal(term)) + return self._parse_symbol(term, Symbol.VARIABLE) def _parse_term(self, term): """Parse a query term into a tree node and return it.""" @@ -134,67 +207,6 @@ class _QueryParser(object): return meth(arg) return Text(self._parse_literal(term)) - def _scan_query(self, query, markers): - """Scan a query (sub)string for the first occurance of some markers. - - Returns a 2-tuple of (first_marker_found, marker_index). - """ - def is_escaped(query, index): - """Return whether a query marker is backslash-escaped.""" - return (index > 0 and query[index - 1] == "\\" and - (index < 2 or query[index - 2] != "\\")) - - best_marker, best_index = None, maxsize - for marker in markers: - index = query.find(marker) - if is_escaped(query, index): - _, new_index = self._scan_query(query[index + 1:], marker) - index += new_index + 1 - if index >= 0 and index < best_index: - best_marker, best_index = marker, index - return best_marker, best_index - - def _split_query(self, query, parens=False): - """Split a query string into a nested list of query terms. - - Returns a list of terms and/or nested sublists of terms. Each term and - sublist is guarenteed to be non-empty. - """ - query = query.lstrip() - if not query: - return [] - marker, index = self._scan_query(query, " \"'()") - if not marker: - return [query] - nest = [query[:index]] if index > 0 else [] - after = query[index + 1:] - - if marker == " ": - nest += self._split_query(after, parens) - elif marker in ('"', "'"): - close_marker, close_index = self._scan_query(after, marker) - if close_marker: - if close_index > 0: - nest.append(after[:close_index]) - after = after[close_index + 1:] - nest += self._split_query(after, parens) - elif after: - nest.append(after) - elif marker == "(": - inner, after = self._split_query(after, True), [] - if inner and isinstance(inner[-1], tuple): - after = self._split_query(inner.pop()[0], parens) - if inner: - nest.append(inner) - if after: - nest += after - elif marker == ")": - if parens: - nest.append((after,)) - else: - nest += self._split_query(after) - return nest - def _parse_boolean_operators(self, nest): """Parse boolean operators in a nested query list.""" op_lookup = { @@ -271,7 +283,7 @@ class _QueryParser(object): :raises: :py:class:`.QueryParseException` """ - nest = self._split_query(query.rstrip()) + nest = self._split_query(query.rstrip(), " \"'()") if not nest: raise QueryParseException('Empty query: "%s"' % query) self._parse_boolean_operators(nest) diff --git a/bitshift/query/nodes.py b/bitshift/query/nodes.py index 5d157b5..d375ffb 100644 --- a/bitshift/query/nodes.py +++ b/bitshift/query/nodes.py @@ -195,7 +195,7 @@ class Symbol(_Node): CLASS = 1 VARIABLE = 2 TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"} - TYPES_INV = ["functions", "classes", "variables"] + TYPES_INV = ["functions", "classes", "vars"] def __init__(self, type_, name): """ diff --git a/bitshift/resources/languages.json b/bitshift/resources/languages.json deleted file mode 100644 index 02ca0ad..0000000 --- a/bitshift/resources/languages.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "_comment" : "A list of programming languages supported by `bitshift`.", - "languages" : ["Debian Sourcelist", "Delphi", "JavaScript+Mako", "Brainfuck", "Ceylon", "JavaScript+Django/Jinja", "HTML+Evoque", "NumPy", "Modula-2", "LiveScript", "Nimrod", "Bash", "HTML+Django/Jinja", "CSS+PHP", "XML+Lasso", "VimL", "CSS+Genshi Text", "Fancy", "Coldfusion HTML", "cfstatement", "Scalate Server Page", "Smarty", "XML+Evoque", "haXe", "PowerShell", "Tea", "HTML+Cheetah", "Mason", "Django/Jinja", "JAGS", "ApacheConf", "DTD", "Lighttpd configuration file", "Java", "JavaScript+Genshi Text", "Scheme", "Nemerle", "RHTML", "Ragel in Java Host", "Darcs Patch", "Puppet", "Octave", "CoffeeScript", "Ragel in D Host", "Scilab", "Monkey", "HTML+Myghty", "CSS", "JavaScript+Smarty", "Io", "COBOLFree", "Asymptote", "vhdl", "Python 3", "CSS+Ruby", "Fortran", "d-objdump", "MySQL", "REBOL", "C++", "ERB", "CBM BASIC V2", "Befunge", "Julia", "MoonScript", "Ruby", "XML+Smarty", "Dylan", "Groovy", "MoinMoin/Trac Wiki markup", "autohotkey", "C", "HTML", "Felix", "CMake", "NSIS", "SourcePawn", "Mako", "VGL", "Velocity", "Koka", "CUDA", "Gnuplot", "IRC logs", "Prolog", "Python", "CSS+Django/Jinja", "verilog", "Smalltalk", "JavaScript+Myghty", "YAML", "Julia console", "ANTLR With ActionScript Target", "XML+Mako", "XSLT", "UrbiScript", "Scaml", "S", "DylanLID", "MAQL", "sqlite3con", "Boo", "OCaml", "eC", "ActionScript", "VB.net", "SquidConf", "XQuery", "D", "Fantom", "Gettext Catalog", "Logos", "Lasso", "SCSS", "BBCode", "Haml", "FoxPro", "Python 3.0 Traceback", "MuPAD", "XML+Ruby", "Dart", "IDL", "dg", "Evoque", "Jade", "c-objdump", "Kconfig", "Java Server Page", "reg", "ABAP", "XML+Velocity", "JavaScript+Cheetah", "HTML+Mako", "Ragel in Ruby Host", "RobotFramework", "Protocol Buffer", "CFEngine3", "Ragel", "GLSL", "COBOL", "TypeScript", "Ada", "PostgreSQL SQL dialect", "Xtend", "Logtalk", "objdump", "CSS+Mako", "ca65", "Objective-C++", "Gherkin", "HTML+PHP", "Makefile", "PostScript", "Hxml", "Kotlin", "PL/pgSQL", "Vala", "Haskell", "Bro", "Lua", "POVRay", "Sass", "ANTLR With Java Target", "Tcl", "ANTLR With ObjectiveC Target", "JavaScript+Ruby", "Racket", "AspectJ", "Base Makefile", "ANTLR With Python Target", "cpp-objdump", "Genshi Text", "Ioke", "PyPy Log", "Croc", "Objective-J", "GAS", "Batchfile", "Snobol", "XML", "ANTLR", "Opa", "XML+Cheetah", "Go", "Diff", "MiniD", "Cython", "Ragel in C Host", "Erlang", "Debian Control file", "aspx-vb", "BUGS", "Ragel in CPP Host", "aspx-cs", "Properties", "Groff", "Clojure", "Modelica", "QML", "JavaScript+Lasso", "ANTLR With Perl Target", "Genshi", "BlitzMax", "Treetop", "Matlab", "Myghty", "HTML+Genshi", "Duel", "Perl", "FSharp", "reStructuredText", "NewLisp", "Scala", "CSS+Lasso", "XML+PHP", "Stan", "INI", "MOOCode", "Shell Session", "RPMSpec", "Newspeak", "Bash Session", "Coq", "Raw token data", "Tcsh", "HTML+Lasso", "C#", "Gosu Template", "RConsole", "MXML", "TeX", "CSS+Smarty", "Text only", "ANTLR With C# Target", "OpenEdge ABL", "Cheetah", "Smali", "CSS+Myghty", "Rd", "LLVM", "Standard ML", "Elixir", "Nginx configuration file", "GoodData-CL", "AppleScript", "HTML+Smarty", "Objective-C", "JavaScript", "Rust", "Common Lisp", "Embedded Ragel", "ActionScript 3", "systemverilog", "Literate Haskell", "Python Traceback", "PHP", "ANTLR With CPP Target", "Gosu", "Hybris", "JavaScript+PHP", "Factor", "HTML+Velocity", "Mscgen", "Ooc", "SQL", "HTTP", "ECL", "Redcode", "Ragel in Objective C Host", "XML+Django/Jinja", "Awk", "JSON", "NASM", "ANTLR With Ruby Target", "XML+Myghty", "AutoIt", "Mako", "CSS+Mako", "HTML+Mako", "XML+Mako", "JavaScript+Mako"] -} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/Parse.java b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java index fc1d36f..1964b59 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/Parse.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java @@ -1,33 +1,13 @@ package com.bitshift.parsing; -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.io.IOException; - -import java.net.ServerSocket; -import java.net.Socket; - -import com.bitshift.parsing.parsers.JavaParser; +import com.bitshift.parsing.utils.ParseServer; public class Parse { public static void main(String[] args) { - String fromClient; - String toClient; - - try { - ServerSocket server = new ServerSocket(5002); - - while(true) { - Socket clientSocket = server.accept(); - - JavaParser parser = new JavaParser(clientSocket); - Thread parserTask = new Thread(parser); - parserTask.start(); - } - } catch (IOException ex) { - } + ParseServer server = new ParseServer(Integer.parseInt(args[0])); + System.out.println("Java Server listening on port " + args[0]); + new Thread(server).start(); } } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java index 4ba3623..989c0dd 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -13,7 +13,6 @@ import org.eclipse.jdt.core.dom.ASTParser; import org.eclipse.jdt.core.dom.ASTVisitor; import org.eclipse.jdt.core.dom.CompilationUnit; import org.eclipse.jdt.core.dom.ClassInstanceCreation; -import org.eclipse.jdt.core.dom.FieldDeclaration; import org.eclipse.jdt.core.dom.MethodDeclaration; import org.eclipse.jdt.core.dom.MethodInvocation; import org.eclipse.jdt.core.dom.Name; @@ -71,22 +70,6 @@ public class JavaParser extends Parser { this._cache = new Stack>(); } - public boolean visit(FieldDeclaration node) { - HashMap data = new HashMap(); - int sl = this.root.getLineNumber(node.getStartPosition()); - int sc = this.root.getColumnNumber(node.getStartPosition()); - - data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); - this._cache.push(data); - return true; - } - - public void endVisit(FieldDeclaration node) { - HashMap data = this._cache.pop(); - String name = (String)data.remove("name"); - this.symbols.insertFieldDeclaration(name, data); - } - public boolean visit(MethodDeclaration node) { HashMap data = new HashMap(); Name nameObj = node.getName(); @@ -115,7 +98,7 @@ public class JavaParser extends Parser { public void endVisit(MethodDeclaration node) { HashMap data = this._cache.pop(); String name = (String)data.remove("name"); - this.symbols.insertMethodDeclaration(name, data); + this.symbols.insertMethodDeclaration("\"" + name + "\"", data); } public boolean visit(MethodInvocation node) { @@ -136,7 +119,7 @@ public class JavaParser extends Parser { public void endVisit(MethodInvocation node) { HashMap data = this._cache.pop(); String name = (String)data.remove("name"); - this.symbols.insertMethodInvocation(name, data); + this.symbols.insertMethodInvocation("\"" + name + "\"", data); } public boolean visit(PackageDeclaration node) { @@ -167,9 +150,9 @@ public class JavaParser extends Parser { String name = (String)data.remove("name"); if (node.isInterface()) { - this.symbols.insertInterfaceDeclaration(name, data); + this.symbols.insertInterfaceDeclaration("\"" + name + "\"", data); } else { - this.symbols.insertClassDeclaration(name, data); + this.symbols.insertClassDeclaration("\"" + name + "\"", data); } } @@ -186,7 +169,7 @@ public class JavaParser extends Parser { public void endVisit(VariableDeclarationFragment node) { HashMap data = this._cache.pop(); String name = (String)data.remove("name"); - this.symbols.insertVariableDeclaration(name, data); + this.symbols.insertVariableDeclaration("\"" + name + "\"", data); } public boolean visit(QualifiedName node) { diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java index 9d00954..83100f5 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java @@ -1,8 +1,9 @@ package com.bitshift.parsing.parsers; import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.InputStreamReader; -import java.io.PrintWriter; +import java.io.OutputStreamWriter; import java.io.IOException; import java.net.Socket; @@ -46,12 +47,16 @@ public abstract class Parser implements Runnable { protected void writeToClient(String toClient) { try { - PrintWriter clientWriter = new PrintWriter( - this.clientSocket.getOutputStream(), true); + BufferedWriter clientWriter = new BufferedWriter( + new OutputStreamWriter(this.clientSocket.getOutputStream())); - PackableMemory mem = new PackableMemory(toClient.length()); + PackableMemory mem = new PackableMemory(4); + mem.pack(toClient.length(), 0); String dataSize = new String(mem.mem); - clientWriter.println(dataSize + toClient); + + clientWriter.write(dataSize + toClient); + clientWriter.flush(); + this.clientSocket.close(); } catch (IOException ex) { } } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java index 5419d5a..6f0caf1 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -11,15 +11,16 @@ public class JavaSymbols extends Symbols { private HashMap> _classes; private HashMap> _interfaces; private HashMap> _methods; - private HashMap> _fields; private HashMap> _vars; + private final String assignKey = "\"assignments\""; + private final String useKey = "\"uses\""; + public JavaSymbols() { _packageName = null; _classes = new HashMap>(); _interfaces = new HashMap>(); _methods = new HashMap>(); - _fields = new HashMap>(); _vars = new HashMap>(); } @@ -34,15 +35,23 @@ public class JavaSymbols extends Symbols { HashMap klass = new HashMap(); assignments.add(data.get("coord")); - klass.put("assignments", assignments); - klass.put("uses", uses); + klass.put(assignKey, assignments); + klass.put(useKey, uses); this._classes.put(name, klass); return true; } public boolean insertInterfaceDeclaration(String name, HashMap data) { - this._interfaces.put(name, data); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + HashMap klass = new HashMap(); + + assignments.add(data.get("coord")); + klass.put(assignKey, assignments); + klass.put(useKey, uses); + + this._interfaces.put(name, klass); return true; } @@ -54,13 +63,13 @@ public class JavaSymbols extends Symbols { ArrayList uses = new ArrayList(10); assignments.add(data.get("coord")); - method.put("assignments", assignments); - method.put("uses", uses); + method.put(assignKey, assignments); + method.put(useKey, uses); } else { - ArrayList assignments = (ArrayList)method.get("assignments"); + ArrayList assignments = (ArrayList)method.get(assignKey); assignments.add(data.get("coord")); - method.put("assignments", assignments); + method.put(assignKey, assignments); } this._methods.put(name, method); @@ -74,24 +83,19 @@ public class JavaSymbols extends Symbols { ArrayList uses = new ArrayList(10); uses.add(data.get("coord")); - method.put("assignments", assignments); - method.put("uses", uses); + method.put(assignKey, assignments); + method.put(useKey, uses); } else { - ArrayList uses = (ArrayList)method.get("uses"); + ArrayList uses = (ArrayList)method.get(useKey); uses.add(data.get("coord")); - method.put("uses", uses); + method.put(useKey, uses); } this._methods.put(name, method); return true; } - public boolean insertFieldDeclaration(String name, HashMap data) { - this._fields.put(name, data); - return true; - } - public boolean insertVariableDeclaration(String name, HashMap data) { HashMap var = this._vars.get(name); if (var == null) { @@ -100,13 +104,13 @@ public class JavaSymbols extends Symbols { ArrayList uses = new ArrayList(10); assignments.add(data.get("coord")); - var.put("assignments", assignments); - var.put("uses", uses); + var.put(assignKey, assignments); + var.put(useKey, uses); } else { - ArrayList assignments = (ArrayList)var.get("assignments"); + ArrayList assignments = (ArrayList)var.get(assignKey); assignments.add(data.get("coord")); - var.put("assignments", assignments); + var.put(assignKey, assignments); } this._vars.put(name, var); @@ -120,13 +124,13 @@ public class JavaSymbols extends Symbols { ArrayList uses = new ArrayList(10); uses.add(data.get("coord")); - var.put("assignments", assignments); - var.put("uses", uses); + var.put(assignKey, assignments); + var.put(useKey, uses); } else { - ArrayList uses = (ArrayList)var.get("uses"); + ArrayList uses = (ArrayList)var.get(useKey); uses.add(data.get("coord")); - var.put("uses", uses); + var.put(useKey, uses); } this._vars.put(name, var); @@ -135,13 +139,14 @@ public class JavaSymbols extends Symbols { public String toString() { StringBuilder builder = new StringBuilder(); - builder.append("classes:" + this._classes + ","); - builder.append("interfaces:" + this._interfaces + ","); - builder.append("methods:" + this._methods + ","); - builder.append("fields:" + this._fields + ","); - builder.append("vars:" + this._vars + ","); - - return "{" + builder.toString() + "}"; + builder.append("\"classes\":" + this._classes + ","); + builder.append("\"interfaces\":" + this._interfaces + ","); + builder.append("\"methods\":" + this._methods + ","); + builder.append("\"vars\":" + this._vars + ","); + + String s = builder.toString().replaceAll("=", ":"); + s = s.substring(0, s.length() - 1); + return "{" + s + "}"; } } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java index 24d883c..1f54d99 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java @@ -22,7 +22,7 @@ public class PackableMemory { // The most significant porion of the integer is stored in mem[loc]. // Bytes are masked out of the integer and stored in the array, working // from right(least significant) to left (most significant). - void pack(int val, int loc) + public void pack(int val, int loc) { final int MASK = 0xff; for (int i = 3; i >= 0; i--) diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/ParseServer.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/ParseServer.java new file mode 100644 index 0000000..291be34 --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/utils/ParseServer.java @@ -0,0 +1,65 @@ +/* Code for multithreaded server taken from Jakob Jenkov */ +package com.bitshift.parsing.utils; + +import java.net.ServerSocket; +import java.net.Socket; +import java.io.IOException; + +import com.bitshift.parsing.parsers.JavaParser; + +public class ParseServer implements Runnable{ + + protected int serverPort = 8080; + protected ServerSocket serverSocket = null; + protected boolean isStopped = false; + protected Thread runningThread= null; + + public ParseServer(int port){ + this.serverPort = port; + } + + public void run(){ + synchronized(this){ + this.runningThread = Thread.currentThread(); + } + openServerSocket(); + while(! isStopped()){ + Socket clientSocket = null; + try { + clientSocket = this.serverSocket.accept(); + } catch (IOException e) { + if(isStopped()) { + System.out.println("Server Stopped.") ; + return; + } + throw new RuntimeException( + "Error accepting client connection", e); + } + new Thread(new JavaParser(clientSocket)).start(); + } + System.out.println("Server Stopped.") ; + } + + + private synchronized boolean isStopped() { + return this.isStopped; + } + + public synchronized void stop(){ + this.isStopped = true; + try { + this.serverSocket.close(); + } catch (IOException e) { + throw new RuntimeException("Error closing server", e); + } + } + + private void openServerSocket() { + try { + this.serverSocket = new ServerSocket(this.serverPort); + } catch (IOException e) { + throw new RuntimeException("Cannot open port 8080", e); + } + } + +} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/Tuple.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/Tuple.java new file mode 100644 index 0000000..115a3c6 --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/utils/Tuple.java @@ -0,0 +1,23 @@ +package com.bitshift.parsing.utils; + +import java.util.List; +import java.util.Arrays; + +public class Tuple { + private List _objects; + + public Tuple(T... args) { + _objects = Arrays.asList(args); + } + + public String toString() { + StringBuilder builder = new StringBuilder(); + + for(T o: this._objects) { + builder.append(o + ","); + } + + String s = builder.toString(); + return "(" + s.substring(0, s.length() - 1) + ")"; + } +} diff --git a/parsers/ruby/Rakefile b/parsers/ruby/Rakefile index e66f695..f8cdf64 100644 --- a/parsers/ruby/Rakefile +++ b/parsers/ruby/Rakefile @@ -1,5 +1,5 @@ require File.expand_path('../lib/parse_server.rb', __FILE__) -task :start_server do |t| - start_server +task :start_server, [:port_number] do |t, args| + start_server Integer(args[:port_number]) end diff --git a/parsers/ruby/lib/parse_server.rb b/parsers/ruby/lib/parse_server.rb index 916f434..2c87e49 100644 --- a/parsers/ruby/lib/parse_server.rb +++ b/parsers/ruby/lib/parse_server.rb @@ -13,8 +13,9 @@ def pack_int(i) end -def start_server - server = TCPServer.new 5003 +def start_server(port_number) + server = TCPServer.new port_number + puts "Ruby Server listening on port #{port_number}\n" loop do # Start a new thread for each client accepted diff --git a/parsers/ruby/lib/parser.rb b/parsers/ruby/lib/parser.rb index c757fa0..eec293b 100644 --- a/parsers/ruby/lib/parser.rb +++ b/parsers/ruby/lib/parser.rb @@ -25,7 +25,8 @@ module Bitshift def initialize(offset, tree) super() - module_hash = Hash.new {|hash, key| hash[key] = { assignments: [], uses: [] }} + module_hash = Hash.new {|hash, key| + hash[key] = { assignments: [], uses: [] }} class_hash = module_hash.clone function_hash = module_hash.clone var_hash = module_hash.clone @@ -118,8 +119,18 @@ module Bitshift end def to_s - str = symbols.to_s - str = str.gsub(/:(\w*)=>/, '"\1":') + new_symbols = Hash.new {|hash, key| hash[key] = Hash.new} + + symbols.each do |type, sym_list| + sym_list.each do |name, sym| + new_symbols[type.to_s][name.to_s] = { + "assignments" => sym[:assignments], + "uses" => sym[:uses]} + end + end + + str = new_symbols.to_s + str = str.gsub(/=>/, ":") return str end end diff --git a/static/js/index.js b/static/js/index.js index b4fb30a..a0771d0 100644 --- a/static/js/index.js +++ b/static/js/index.js @@ -50,7 +50,7 @@ var codeExample = '
' + title.innerHTML = 'File ' + codelet.filename + ''; site.innerHTML = 'on ' + codelet.origin[0] +''; language.innerHTML = codelet.language; diff --git a/test/parser_test.py b/test/parser_test.py index a1cfad3..ffee75c 100644 --- a/test/parser_test.py +++ b/test/parser_test.py @@ -21,7 +21,7 @@ if __name__ == '__main__': elif sys.argv[1] == 'ruby': file_name = "resources/parser.rb" - server_socket_number = 5003 + server_socket_number = 5065 server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_socket.connect(("localhost", server_socket_number))