Merge required to continue search-results styling with access to the `/search` route. Conflicts: app.pytags/v1.0^2
@@ -51,4 +51,4 @@ target | |||
# Ctags | |||
*/tags | |||
log | |||
logs |
@@ -2,13 +2,14 @@ | |||
Module to contain all the project's Flask server plumbing. | |||
""" | |||
from flask import Flask | |||
from flask import render_template, session | |||
from json import dumps | |||
from flask import Flask, make_response, render_template, request | |||
from bitshift import assets | |||
from bitshift import languages | |||
# from bitshift.database import Database | |||
# from bitshift.query import parse_query | |||
from bitshift.database import Database | |||
from bitshift.languages import LANGS | |||
from bitshift.query import parse_query, QueryParseException | |||
app = Flask(__name__) | |||
app.config.from_object("bitshift.config") | |||
@@ -17,17 +18,33 @@ app_env = app.jinja_env | |||
app_env.line_statement_prefix = "=" | |||
app_env.globals.update(assets=assets) | |||
# database = Database() | |||
database = Database() | |||
@app.route("/") | |||
def index(): | |||
return render_template("index.html", autocomplete_languages=languages.LANGS) | |||
@app.route("/search/<query>") | |||
def search(query): | |||
# tree = parse_query(query) | |||
# database.search(tree) | |||
pass | |||
return render_template("index.html", autocomplete_languages=LANGS) | |||
@app.route("/search.json") | |||
def search(): | |||
def reply(json): | |||
resp = make_response(dumps(json)) | |||
resp.mimetype = "application/json" | |||
return resp | |||
query, page = request.args.get("q"), request.args.get("p", 1) | |||
if not query: | |||
return reply({"error": "No query given"}) | |||
try: | |||
tree = parse_query(query) | |||
except QueryParseException as exc: | |||
return reply({"error": exc.args[0]}) | |||
try: | |||
page = int(page) | |||
except ValueError: | |||
return reply({"error": u"Invalid page number: %s" % page}) | |||
count, codelets = database.search(tree, page) | |||
results = [clt.serialize() for clt in codelets] | |||
return reply({"count": count, "results": results}) | |||
@app.route("/about") | |||
def about(): | |||
@@ -1,3 +1,5 @@ | |||
from .languages import LANGS | |||
__all__ = ["Codelet"] | |||
class Codelet(object): | |||
@@ -11,7 +13,7 @@ class Codelet(object): | |||
:ivar authors: (array of tuples (str, str or None)) An array of tuples | |||
containing an author's name and profile URL (on the service the code | |||
was pulled from). | |||
:ivar code_url: (str) The url of the (page containing the) source code. | |||
:ivar url: (str) The url of the (page containing the) source code. | |||
:ivar date_created: (:class:`datetime.datetime`, or None) The date the code | |||
was published. | |||
:ivar date_modified: (:class:`datetime.datetime`, or None) The date the | |||
@@ -24,8 +26,8 @@ class Codelet(object): | |||
added by the database. | |||
""" | |||
def __init__(self, name, code, filename, language, authors, code_url, | |||
date_created, date_modified, rank, symbols=None, origin=None): | |||
def __init__(self, name, code, filename, language, authors, url, | |||
date_created, date_modified, rank, symbols=None, origin=None): | |||
""" | |||
Create a Codelet instance. | |||
@@ -34,7 +36,7 @@ class Codelet(object): | |||
:param filename: see :attr:`self.filename` | |||
:param language: see :attr:`self.language` | |||
:param authors: see :attr:`self.authors` | |||
:param code_url: see :attr:`self.code_url` | |||
:param url: see :attr:`self.url` | |||
:param date_created: see :attr:`self.date_created` | |||
:param date_modified: see :attr:`self.date_modified` | |||
:param rank: see :attr:`self.rank` | |||
@@ -46,7 +48,7 @@ class Codelet(object): | |||
:type filename: see :attr:`self.filename` | |||
:type language: see :attr:`self.language` | |||
:type authors: see :attr:`self.authors` | |||
:type code_url: see :attr:`self.code_url` | |||
:type url: see :attr:`self.url` | |||
:type date_created: see :attr:`self.date_created` | |||
:type date_modified: see :attr:`self.date_modified` | |||
:type rank: see :attr:`self.rank` | |||
@@ -59,9 +61,24 @@ class Codelet(object): | |||
self.filename = filename | |||
self.language = language | |||
self.authors = authors | |||
self.code_url = code_url | |||
self.url = url | |||
self.date_created = date_created | |||
self.date_modified = date_modified | |||
self.rank = rank | |||
self.symbols = symbols or {} | |||
self.origin = origin or (None, None, None) | |||
def serialize(self): | |||
""" | |||
Convert the codelet into a dictionary that can be sent as JSON. | |||
:return: The codelet as a dictionary. | |||
:rtype: str | |||
""" | |||
return { | |||
"name": self.name, "code": self.code, "lang": LANGS[self.language], | |||
"authors": self.authors, "url": self.url, | |||
"created": self.date_created.isoformat(), | |||
"modified": self.date_modified.isoformat(), | |||
"symbols": self.symbols, "origin": self.origin | |||
} |
@@ -1,55 +0,0 @@ | |||
""" | |||
:synopsis: Parent crawler module, which supervises all crawlers. | |||
Contains functions for initializing all subsidiary, threaded crawlers. | |||
""" | |||
import logging, logging.handlers, os, Queue | |||
from bitshift.crawler import crawler, indexer | |||
__all__ = ["crawl"] | |||
def crawl(): | |||
""" | |||
Initialize all crawlers (and indexers). | |||
Start the: | |||
1. GitHub crawler, :class:`crawler.GitHubCrawler`. | |||
2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`. | |||
3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`. | |||
""" | |||
_configure_logging() | |||
MAX_URL_QUEUE_SIZE = 5e3 | |||
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | |||
threads = [crawler.GitHubCrawler(repo_clone_queue), | |||
crawler.BitbucketCrawler(repo_clone_queue), | |||
indexer.GitIndexer(repo_clone_queue)] | |||
for thread in threads: | |||
thread.start() | |||
def _configure_logging(): | |||
LOG_FILE_DIR = "log" | |||
if not os.path.exists(LOG_FILE_DIR): | |||
os.mkdir(LOG_FILE_DIR) | |||
logging.getLogger("requests").setLevel(logging.WARNING) | |||
logging.getLogger("urllib3").setLevel(logging.WARNING) | |||
formatter = logging.Formatter( | |||
fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s" | |||
" %(message)s"), datefmt="%y-%m-%d %H:%M:%S") | |||
handler = logging.handlers.TimedRotatingFileHandler( | |||
"%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1, | |||
backupCount=20) | |||
handler.setFormatter(formatter) | |||
root_logger = logging.getLogger() | |||
root_logger.addHandler(handler) | |||
root_logger.setLevel(logging.NOTSET) |
@@ -0,0 +1,65 @@ | |||
""" | |||
:synopsis: Parent crawler module, which supervises all crawlers. | |||
Contains functions for initializing all subsidiary, threaded crawlers. | |||
""" | |||
import logging, logging.handlers, os, Queue | |||
from bitshift.crawler import crawler, indexer | |||
from bitshift.parser import parse, start_parse_servers | |||
__all__ = ["crawl"] | |||
def crawl(): | |||
""" | |||
Initialize all crawlers (and indexers). | |||
Start the: | |||
1. GitHub crawler, :class:`crawler.GitHubCrawler`. | |||
2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`. | |||
3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`. | |||
""" | |||
_configure_logging() | |||
MAX_URL_QUEUE_SIZE = 5e3 | |||
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | |||
threads = [crawler.GitHubCrawler(repo_clone_queue), | |||
crawler.BitbucketCrawler(repo_clone_queue), | |||
indexer.GitIndexer(repo_clone_queue)] | |||
for thread in threads: | |||
thread.start() | |||
parse_servers = start_parse_servers() | |||
def _configure_logging(): | |||
# This isn't ideal, since it means the bitshift python package must be kept | |||
# inside the app, but it works for now: | |||
root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) | |||
log_dir = os.path.join(root, "logs") | |||
if not os.path.exists(log_dir): | |||
os.mkdir(log_dir) | |||
logging.getLogger("requests").setLevel(logging.WARNING) | |||
logging.getLogger("urllib3").setLevel(logging.WARNING) | |||
formatter = logging.Formatter( | |||
fmt=("%(asctime)s %(levelname)s %(name)s:%(funcName)s" | |||
" %(message)s"), datefmt="%y-%m-%d %H:%M:%S") | |||
handler = logging.handlers.TimedRotatingFileHandler( | |||
"%s/%s" % (log_dir, "app.log"), when="H", interval=1, | |||
backupCount=20) | |||
handler.setFormatter(formatter) | |||
root_logger = logging.getLogger() | |||
root_logger.addHandler(handler) | |||
root_logger.setLevel(logging.NOTSET) | |||
if __name__ == "__main__": | |||
_configure_logging() | |||
crawl() |
@@ -7,6 +7,7 @@ import bs4, datetime, logging, os, Queue, re, shutil, string, subprocess, time,\ | |||
threading | |||
from ..database import Database | |||
from ..parser import parse, UnsupportedFileError | |||
from ..codelet import Codelet | |||
GIT_CLONE_DIR = "/tmp/bitshift" | |||
@@ -73,6 +74,7 @@ class GitIndexer(threading.Thread): | |||
self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE) | |||
self.git_cloner = _GitCloner(clone_queue, self.index_queue) | |||
self.git_cloner.start() | |||
self.database = Database() | |||
self._logger = logging.getLogger("%s.%s" % | |||
(__name__, self.__class__.__name__)) | |||
self._logger.info("Starting.") | |||
@@ -98,10 +100,7 @@ class GitIndexer(threading.Thread): | |||
repo = self.index_queue.get() | |||
self.index_queue.task_done() | |||
try: | |||
self._index_repository(repo) | |||
except Exception as excep: | |||
self._logger.warning("%s: %s.", excep.__class__.__name__, excep) | |||
self._index_repository(repo) | |||
def _index_repository(self, repo): | |||
""" | |||
@@ -119,10 +118,15 @@ class GitIndexer(threading.Thread): | |||
try: | |||
self._insert_repository_codelets(repo) | |||
except Exception as excep: | |||
self._logger.warning("%s: %s.", excep.__class__.__name__, excep) | |||
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)): | |||
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name)) | |||
self._logger.exception("Exception raised while indexing:") | |||
finally: | |||
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)): | |||
if len([obj for obj in os.listdir('.') if | |||
os.path.isdir(obj)]) <= 1: | |||
shutil.rmtree("%s/%s" % ( | |||
GIT_CLONE_DIR, repo.name.split("/")[0])) | |||
else: | |||
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name)) | |||
def _insert_repository_codelets(self, repo): | |||
""" | |||
@@ -147,17 +151,22 @@ class GitIndexer(threading.Thread): | |||
source = self._decode(source_file.read()) | |||
if source is None: | |||
continue | |||
except IOError as exception: | |||
except IOError: | |||
continue | |||
authors = [(self._decode(author), None) for author in \ | |||
commits_meta[filename]["authors"]] | |||
authors = [(self._decode(author), None) for author in | |||
commits_meta[filename]["authors"]] | |||
codelet = Codelet("%s:%s" % (repo.name, filename), source, filename, | |||
None, authors, self._generate_file_url(filename, | |||
repo.url, repo.framework_name), | |||
commits_meta[filename]["time_created"], | |||
commits_meta[filename]["time_last_modified"], | |||
repo.rank) | |||
try: | |||
parse(codelet) | |||
except UnsupportedFileError: | |||
continue | |||
self.database.insert(codelet) | |||
def _generate_file_url(self, filename, repo_url, framework_name): | |||
""" | |||
@@ -63,7 +63,7 @@ class Database(object): | |||
query, args = tree.build_query(page) | |||
cursor.execute(query, args) | |||
ids = [id for id, _ in cursor.fetchall()] | |||
num_results = 0 # TODO: NotImplemented | |||
num_results = len(ids) # TODO: NotImplemented | |||
return ids, num_results | |||
def _get_authors_for_codelet(self, cursor, codelet_id): | |||
@@ -103,13 +103,13 @@ class Database(object): | |||
WHERE codelet_id = ?""" | |||
with self._conn.cursor(oursql.DictCursor) as dict_cursor: | |||
dict_cursor.executemany(query, [(id,) for id in ids]) | |||
for row in dict_cursor.fetchone(): | |||
codelet_id = row["codelet_id"] | |||
for codelet_id in ids: | |||
dict_cursor.execute(query, (codelet_id,)) | |||
row = dict_cursor.fetchall()[0] | |||
if row["origin_url_base"]: | |||
url = row["codelet_url"] | |||
else: | |||
url = row["origin_url_base"] + row["codelet_url"] | |||
else: | |||
url = row["codelet_url"] | |||
origin = (row["origin_name"], row["origin_url"], | |||
row["origin_image"]) | |||
authors = self._get_authors_for_codelet(cursor, codelet_id) | |||
@@ -160,28 +160,31 @@ class Database(object): | |||
:return: The total number of results, and the *n*\ th page of results. | |||
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) | |||
""" | |||
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||
query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | |||
query2 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||
FROM cache | |||
INNER JOIN cache_data ON cache_id = cdata_cache | |||
WHERE cache_id = ?""" | |||
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||
query3 = "INSERT INTO cache_data VALUES (?, ?)" | |||
query3 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||
query4 = "INSERT INTO cache_data VALUES (?, ?)" | |||
cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] | |||
with self._conn.cursor() as cursor: | |||
cursor.execute(query1, (cache_id,)) | |||
results = cursor.fetchall() | |||
if results: # Cache hit | |||
num_results = results[0][1] * (10 ** results[0][2]) | |||
ids = [res[0] for res in results] | |||
else: # Cache miss | |||
cache_hit = cursor.fetchall() | |||
if cache_hit: | |||
cursor.execute(query2, (cache_id,)) | |||
rows = cursor.fetchall() | |||
num_results = rows[0][1] * (10 ** rows[0][2]) if rows else 0 | |||
ids = [row[0] for row in rows] | |||
else: | |||
ids, num_results = self._search_with_query(cursor, query, page) | |||
num_exp = max(len(str(num_results)) - 3, 0) | |||
num_results = int(round(num_results, -num_exp)) | |||
num_mnt = num_results / (10 ** num_exp) | |||
cursor.execute(query2, (cache_id, num_mnt, num_exp)) | |||
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) | |||
cursor.execute(query3, (cache_id, num_mnt, num_exp)) | |||
cursor.executemany(query4, [(cache_id, c_id) for c_id in ids]) | |||
codelet_gen = self._get_codelets_from_ids(cursor, ids) | |||
return (num_results, list(codelet_gen)) | |||
@@ -0,0 +1,283 @@ | |||
{ | |||
"_comment" : "A list of programming languages supported by `bitshift`.", | |||
"languages" : [ | |||
"Debian Sourcelist", | |||
"Delphi", | |||
"JavaScript+Mako", | |||
"Brainfuck", | |||
"Ceylon", | |||
"JavaScript+Django/Jinja", | |||
"HTML+Evoque", | |||
"NumPy", | |||
"Modula-2", | |||
"LiveScript", | |||
"Nimrod", | |||
"Bash", | |||
"HTML+Django/Jinja", | |||
"CSS+PHP", | |||
"XML+Lasso", | |||
"VimL", | |||
"CSS+Genshi Text", | |||
"Fancy", | |||
"Coldfusion HTML", | |||
"cfstatement", | |||
"Scalate Server Page", | |||
"Smarty", | |||
"XML+Evoque", | |||
"haXe", | |||
"PowerShell", | |||
"Tea", | |||
"HTML+Cheetah", | |||
"Mason", | |||
"Django/Jinja", | |||
"JAGS", | |||
"ApacheConf", | |||
"DTD", | |||
"Lighttpd configuration file", | |||
"Java", | |||
"JavaScript+Genshi Text", | |||
"Scheme", | |||
"Nemerle", | |||
"RHTML", | |||
"Ragel in Java Host", | |||
"Darcs Patch", | |||
"Puppet", | |||
"Octave", | |||
"CoffeeScript", | |||
"Ragel in D Host", | |||
"Scilab", | |||
"Monkey", | |||
"HTML+Myghty", | |||
"CSS", | |||
"JavaScript+Smarty", | |||
"Io", | |||
"COBOLFree", | |||
"Asymptote", | |||
"vhdl", | |||
"CSS+Ruby", | |||
"Fortran", | |||
"d-objdump", | |||
"MySQL", | |||
"REBOL", | |||
"C++", | |||
"ERB", | |||
"CBM BASIC V2", | |||
"Befunge", | |||
"Julia", | |||
"MoonScript", | |||
"Ruby", | |||
"XML+Smarty", | |||
"Dylan", | |||
"Groovy", | |||
"MoinMoin/Trac Wiki markup", | |||
"autohotkey", | |||
"C", | |||
"HTML", | |||
"Felix", | |||
"CMake", | |||
"NSIS", | |||
"SourcePawn", | |||
"Mako", | |||
"VGL", | |||
"Velocity", | |||
"Koka", | |||
"CUDA", | |||
"Gnuplot", | |||
"IRC logs", | |||
"Prolog", | |||
"Python", | |||
"CSS+Django/Jinja", | |||
"verilog", | |||
"Smalltalk", | |||
"JavaScript+Myghty", | |||
"YAML", | |||
"Julia console", | |||
"ANTLR With ActionScript Target", | |||
"XML+Mako", | |||
"XSLT", | |||
"UrbiScript", | |||
"Scaml", | |||
"S", | |||
"DylanLID", | |||
"MAQL", | |||
"sqlite3con", | |||
"Boo", | |||
"OCaml", | |||
"eC", | |||
"ActionScript", | |||
"VB.net", | |||
"SquidConf", | |||
"XQuery", | |||
"D", | |||
"Fantom", | |||
"Gettext Catalog", | |||
"Logos", | |||
"Lasso", | |||
"SCSS", | |||
"BBCode", | |||
"Haml", | |||
"FoxPro", | |||
"MuPAD", | |||
"XML+Ruby", | |||
"Dart", | |||
"IDL", | |||
"dg", | |||
"Evoque", | |||
"Jade", | |||
"c-objdump", | |||
"Kconfig", | |||
"Java Server Page", | |||
"reg", | |||
"ABAP", | |||
"XML+Velocity", | |||
"JavaScript+Cheetah", | |||
"HTML+Mako", | |||
"Ragel in Ruby Host", | |||
"RobotFramework", | |||
"Protocol Buffer", | |||
"CFEngine3", | |||
"Ragel", | |||
"GLSL", | |||
"COBOL", | |||
"TypeScript", | |||
"Ada", | |||
"PostgreSQL SQL dialect", | |||
"Xtend", | |||
"Logtalk", | |||
"objdump", | |||
"CSS+Mako", | |||
"ca65", | |||
"Objective-C++", | |||
"Gherkin", | |||
"HTML+PHP", | |||
"Makefile", | |||
"PostScript", | |||
"Hxml", | |||
"Kotlin", | |||
"PL/pgSQL", | |||
"Vala", | |||
"Haskell", | |||
"Bro", | |||
"Lua", | |||
"POVRay", | |||
"Sass", | |||
"ANTLR With Java Target", | |||
"Tcl", | |||
"ANTLR With ObjectiveC Target", | |||
"JavaScript+Ruby", | |||
"Racket", | |||
"AspectJ", | |||
"Base Makefile", | |||
"ANTLR With Python Target", | |||
"cpp-objdump", | |||
"Genshi Text", | |||
"Ioke", | |||
"PyPy Log", | |||
"Croc", | |||
"Objective-J", | |||
"GAS", | |||
"Batchfile", | |||
"Snobol", | |||
"XML", | |||
"ANTLR", | |||
"Opa", | |||
"XML+Cheetah", | |||
"Go", | |||
"Diff", | |||
"MiniD", | |||
"Cython", | |||
"Ragel in C Host", | |||
"Erlang", | |||
"Debian Control file", | |||
"aspx-vb", | |||
"BUGS", | |||
"Ragel in CPP Host", | |||
"aspx-cs", | |||
"Properties", | |||
"Groff", | |||
"Clojure", | |||
"Modelica", | |||
"QML", | |||
"JavaScript+Lasso", | |||
"ANTLR With Perl Target", | |||
"Genshi", | |||
"BlitzMax", | |||
"Treetop", | |||
"Matlab", | |||
"Myghty", | |||
"HTML+Genshi", | |||
"Duel", | |||
"Perl", | |||
"FSharp", | |||
"reStructuredText", | |||
"NewLisp", | |||
"Scala", | |||
"CSS+Lasso", | |||
"XML+PHP", | |||
"Stan", | |||
"INI", | |||
"MOOCode", | |||
"Shell Session", | |||
"RPMSpec", | |||
"Newspeak", | |||
"Bash Session", | |||
"Coq", | |||
"Raw token data", | |||
"Tcsh", | |||
"HTML+Lasso", | |||
"C#", | |||
"Gosu Template", | |||
"RConsole", | |||
"MXML", | |||
"TeX", | |||
"CSS+Smarty", | |||
"Text only", | |||
"ANTLR With C# Target", | |||
"OpenEdge ABL", | |||
"Cheetah", | |||
"Smali", | |||
"CSS+Myghty", | |||
"Rd", | |||
"LLVM", | |||
"Standard ML", | |||
"Elixir", | |||
"Nginx configuration file", | |||
"GoodData-CL", | |||
"AppleScript", | |||
"HTML+Smarty", | |||
"Objective-C", | |||
"JavaScript", | |||
"Rust", | |||
"Common Lisp", | |||
"Embedded Ragel", | |||
"ActionScript 3", | |||
"systemverilog", | |||
"Literate Haskell", | |||
"PHP", | |||
"ANTLR With CPP Target", | |||
"Gosu", | |||
"Hybris", | |||
"JavaScript+PHP", | |||
"Factor", | |||
"HTML+Velocity", | |||
"Mscgen", | |||
"Ooc", | |||
"SQL", | |||
"HTTP", | |||
"ECL", | |||
"Redcode", | |||
"Ragel in Objective C Host", | |||
"XML+Django/Jinja", | |||
"Awk", | |||
"JSON", | |||
"NASM", | |||
"ANTLR With Ruby Target", | |||
"XML+Myghty", | |||
"AutoIt", | |||
"Mako", | |||
"CSS+Mako", | |||
"HTML+Mako", | |||
"XML+Mako", | |||
"JavaScript+Mako" | |||
] | |||
} |
@@ -1,5 +1,5 @@ | |||
import json | |||
from os import path | |||
with open("bitshift/resources/languages.json") as lang_json: | |||
LANGS = [lang.encode("ascii","ignore") for lang in | |||
json.load(lang_json)["languages"]] | |||
with open(path.join(path.dirname(__file__), "languages.json")) as lang_json: | |||
LANGS = [lang for lang in json.load(lang_json)["languages"]] |
@@ -1,8 +1,25 @@ | |||
import json, pygments.lexers as pgl, sys, socket, struct | |||
import json | |||
import sys | |||
import socket | |||
import struct | |||
import subprocess | |||
from os import path | |||
from pygments import lexers as pgl, util | |||
from ..languages import LANGS | |||
from .python import parse_py | |||
_all__ = ["parse"] | |||
_all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] | |||
PARSER_COMMANDS = [ | |||
('Java', ['mvn', '-f', | |||
path.join(path.dirname(__file__), "../../parsers/java/pom.xml"), | |||
'exec:java', '-Dexec.args="%d"']), | |||
('Ruby', ['rake', '-f', | |||
path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"), | |||
"'start_server[%d]'"]) | |||
] | |||
class UnsupportedFileError(Exception): | |||
pass | |||
@@ -19,13 +36,15 @@ def _lang(codelet): | |||
Modify function to incorporate tags from stackoverflow. | |||
""" | |||
if codelet.filename is not None: | |||
try: | |||
return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name | |||
except: | |||
raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') | |||
try: | |||
if codelet.filename: | |||
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code) | |||
else: | |||
lex = pgl.guess_lexer(codelet.code) | |||
except util.ClassNotFound: | |||
raise UnsupportedFileError(codelet.filename) | |||
return LANGS.index(pgl.guess_lexer(codelet.code)) | |||
return LANGS.index(lex.name) | |||
def _recv_data(server_socket): | |||
""" | |||
@@ -39,8 +58,9 @@ def _recv_data(server_socket): | |||
""" | |||
recv_size = 8192 | |||
total_data = []; size_data = cur_data = '' | |||
total_size = 0; size = sys.maxint | |||
total_data = [] | |||
size_data = cur_data = '' | |||
total_size, size = 0, sys.maxint | |||
while total_size < size: | |||
cur_data = server_socket.recv(recv_size) | |||
@@ -61,8 +81,23 @@ def _recv_data(server_socket): | |||
total_size = sum([len(s) for s in total_data]) | |||
server_socket.close() | |||
return ''.join(total_data); | |||
return ''.join(total_data) | |||
def start_parse_servers(): | |||
""" | |||
Starts all the parse servers for languages besides python. | |||
:rtype: list | |||
""" | |||
procs = [] | |||
for (lang, cmd) in PARSER_COMMANDS: | |||
procs.append( | |||
subprocess.Popen(' '.join(cmd) % (5001 + LANGS.index(lang)), | |||
shell=True)) | |||
return procs | |||
def parse(codelet): | |||
""" | |||
@@ -76,9 +111,10 @@ def parse(codelet): | |||
:type code: Codelet | |||
""" | |||
lang = _lang(codelet); source = codelet.code | |||
lang = _lang(codelet) | |||
source = codelet.code | |||
codelet.language = lang | |||
server_socket_number = 5000 + lang | |||
server_socket_number = 5001 + lang | |||
if lang == LANGS.index('Python'): | |||
parse_py(codelet) | |||
@@ -86,8 +122,13 @@ def parse(codelet): | |||
else: | |||
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |||
server_socket.connect(("localhost", server_socket_number)) | |||
server_socket.send("%d\n%s" % (len(source), source)); | |||
server_socket.send("%d\n%s" % (len(source), source)) | |||
symbols = json.loads(_recv_data(server_socket)) | |||
codelet.symbols = symbols | |||
symbols = {key: [(name, [tuple(loc) | |||
for loc in syms[name]['assignments']], | |||
[tuple(loc) for loc in syms[name]['uses']]) | |||
for name in syms.keys()] | |||
for key, syms in symbols.iteritems()} | |||
codelet.symbols = symbols |
@@ -1,4 +1,7 @@ | |||
import ast | |||
import re | |||
encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) | |||
class _CachedWalker(ast.NodeVisitor): | |||
""" | |||
@@ -154,7 +157,26 @@ def parse_py(codelet): | |||
:type code: Codelet | |||
""" | |||
tree = ast.parse(codelet.code) | |||
def strip_encoding(lines): | |||
"""Strips the encoding line from a file, which breaks the parser.""" | |||
it = iter(lines) | |||
try: | |||
first = next(it) | |||
if not encoding_re.match(first): | |||
yield first | |||
second = next(it) | |||
if not encoding_re.match(second): | |||
yield second | |||
except StopIteration: | |||
return | |||
for line in it: | |||
yield line | |||
try: | |||
tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines()))) | |||
except SyntaxError: | |||
## TODO: add some logging here? | |||
return | |||
cutter = _CachedWalker() | |||
cutter.visit(tree) | |||
codelet.symbols = cutter.accum |
@@ -36,10 +36,75 @@ class _QueryParser(object): | |||
self._parse_variable: ["v", "var", "variable"] | |||
} | |||
def _scan_query(self, query, markers): | |||
"""Scan a query (sub)string for the first occurance of some markers. | |||
Returns a 2-tuple of (first_marker_found, marker_index). | |||
""" | |||
def is_escaped(query, index): | |||
"""Return whether a query marker is backslash-escaped.""" | |||
return (index > 0 and query[index - 1] == "\\" and | |||
(index < 2 or query[index - 2] != "\\")) | |||
best_marker, best_index = None, maxsize | |||
for marker in markers: | |||
index = query.find(marker) | |||
if is_escaped(query, index): | |||
_, new_index = self._scan_query(query[index + 1:], marker) | |||
index += new_index + 1 | |||
if index >= 0 and index < best_index: | |||
best_marker, best_index = marker, index | |||
return best_marker, best_index | |||
def _split_query(self, query, markers, parens=False): | |||
"""Split a query string into a nested list of query terms. | |||
Returns a list of terms and/or nested sublists of terms. Each term and | |||
sublist is guarenteed to be non-empty. | |||
""" | |||
query = query.lstrip() | |||
if not query: | |||
return [] | |||
marker, index = self._scan_query(query, markers) | |||
if not marker: | |||
return [query] | |||
nest = [query[:index]] if index > 0 else [] | |||
after = query[index + 1:] | |||
if marker == " ": | |||
nest += self._split_query(after, markers, parens) | |||
elif marker in ('"', "'"): | |||
close_marker, close_index = self._scan_query(after, marker) | |||
if close_marker: | |||
if close_index > 0: | |||
nest.append(after[:close_index]) | |||
after = after[close_index + 1:] | |||
nest += self._split_query(after, markers, parens) | |||
elif after: | |||
nest.append(after) | |||
elif marker == "(": | |||
inner, after = self._split_query(after, markers, True), [] | |||
if inner and isinstance(inner[-1], tuple): | |||
after = self._split_query(inner.pop()[0], markers, parens) | |||
if inner: | |||
nest.append(inner) | |||
if after: | |||
nest += after | |||
elif marker == ")": | |||
if parens: | |||
nest.append((after,)) | |||
else: | |||
nest += self._split_query(after, markers) | |||
return nest | |||
def _parse_literal(self, literal): | |||
"""Parse part of a search query into a string or regular expression.""" | |||
if literal.startswith(("r:", "re:", "regex:", "regexp:")): | |||
return Regex(literal.split(":", 1)[1]) | |||
arg = literal.split(":", 1)[1] | |||
if not arg: | |||
err = 'Incomplete query term: "%s"' % literal | |||
raise QueryParseException(err) | |||
return Regex(arg) | |||
return String(literal) | |||
def _parse_language(self, term): | |||
@@ -98,21 +163,29 @@ class _QueryParser(object): | |||
"""Parse part of a query into a date created node and return it.""" | |||
return self._parse_date(term, Date.CREATE) | |||
def _parse_symbol(self, term): | |||
def _parse_symbol(self, term, stype=Symbol.ALL): | |||
"""Parse part of a query into a symbol node and return it.""" | |||
return Symbol(Symbol.ALL, self._parse_literal(term)) | |||
literal = self._parse_literal(term) | |||
if isinstance(literal, String): | |||
make_symbol = lambda lit: Symbol(stype, String(lit)) | |||
symbols = self._split_query(literal.string, " \"'") | |||
node = make_symbol(symbols.pop()) | |||
while symbols: | |||
node = BinaryOp(make_symbol(symbols.pop()), BinaryOp.OR, node) | |||
return node | |||
return Symbol(stype, literal) | |||
def _parse_function(self, term): | |||
"""Parse part of a query into a function node and return it.""" | |||
return Symbol(Symbol.FUNCTION, self._parse_literal(term)) | |||
return self._parse_symbol(term, Symbol.FUNCTION) | |||
def _parse_class(self, term): | |||
"""Parse part of a query into a class node and return it.""" | |||
return Symbol(Symbol.CLASS, self._parse_literal(term)) | |||
return self._parse_symbol(term, Symbol.CLASS) | |||
def _parse_variable(self, term): | |||
"""Parse part of a query into a variable node and return it.""" | |||
return Symbol(Symbol.VARIABLE, self._parse_literal(term)) | |||
return self._parse_symbol(term, Symbol.VARIABLE) | |||
def _parse_term(self, term): | |||
"""Parse a query term into a tree node and return it.""" | |||
@@ -134,67 +207,6 @@ class _QueryParser(object): | |||
return meth(arg) | |||
return Text(self._parse_literal(term)) | |||
def _scan_query(self, query, markers): | |||
"""Scan a query (sub)string for the first occurance of some markers. | |||
Returns a 2-tuple of (first_marker_found, marker_index). | |||
""" | |||
def is_escaped(query, index): | |||
"""Return whether a query marker is backslash-escaped.""" | |||
return (index > 0 and query[index - 1] == "\\" and | |||
(index < 2 or query[index - 2] != "\\")) | |||
best_marker, best_index = None, maxsize | |||
for marker in markers: | |||
index = query.find(marker) | |||
if is_escaped(query, index): | |||
_, new_index = self._scan_query(query[index + 1:], marker) | |||
index += new_index + 1 | |||
if index >= 0 and index < best_index: | |||
best_marker, best_index = marker, index | |||
return best_marker, best_index | |||
def _split_query(self, query, parens=False): | |||
"""Split a query string into a nested list of query terms. | |||
Returns a list of terms and/or nested sublists of terms. Each term and | |||
sublist is guarenteed to be non-empty. | |||
""" | |||
query = query.lstrip() | |||
if not query: | |||
return [] | |||
marker, index = self._scan_query(query, " \"'()") | |||
if not marker: | |||
return [query] | |||
nest = [query[:index]] if index > 0 else [] | |||
after = query[index + 1:] | |||
if marker == " ": | |||
nest += self._split_query(after, parens) | |||
elif marker in ('"', "'"): | |||
close_marker, close_index = self._scan_query(after, marker) | |||
if close_marker: | |||
if close_index > 0: | |||
nest.append(after[:close_index]) | |||
after = after[close_index + 1:] | |||
nest += self._split_query(after, parens) | |||
elif after: | |||
nest.append(after) | |||
elif marker == "(": | |||
inner, after = self._split_query(after, True), [] | |||
if inner and isinstance(inner[-1], tuple): | |||
after = self._split_query(inner.pop()[0], parens) | |||
if inner: | |||
nest.append(inner) | |||
if after: | |||
nest += after | |||
elif marker == ")": | |||
if parens: | |||
nest.append((after,)) | |||
else: | |||
nest += self._split_query(after) | |||
return nest | |||
def _parse_boolean_operators(self, nest): | |||
"""Parse boolean operators in a nested query list.""" | |||
op_lookup = { | |||
@@ -271,7 +283,7 @@ class _QueryParser(object): | |||
:raises: :py:class:`.QueryParseException` | |||
""" | |||
nest = self._split_query(query.rstrip()) | |||
nest = self._split_query(query.rstrip(), " \"'()") | |||
if not nest: | |||
raise QueryParseException('Empty query: "%s"' % query) | |||
self._parse_boolean_operators(nest) | |||
@@ -195,7 +195,7 @@ class Symbol(_Node): | |||
CLASS = 1 | |||
VARIABLE = 2 | |||
TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"} | |||
TYPES_INV = ["functions", "classes", "variables"] | |||
TYPES_INV = ["functions", "classes", "vars"] | |||
def __init__(self, type_, name): | |||
""" | |||
@@ -1,4 +0,0 @@ | |||
{ | |||
"_comment" : "A list of programming languages supported by `bitshift`.", | |||
"languages" : ["Debian Sourcelist", "Delphi", "JavaScript+Mako", "Brainfuck", "Ceylon", "JavaScript+Django/Jinja", "HTML+Evoque", "NumPy", "Modula-2", "LiveScript", "Nimrod", "Bash", "HTML+Django/Jinja", "CSS+PHP", "XML+Lasso", "VimL", "CSS+Genshi Text", "Fancy", "Coldfusion HTML", "cfstatement", "Scalate Server Page", "Smarty", "XML+Evoque", "haXe", "PowerShell", "Tea", "HTML+Cheetah", "Mason", "Django/Jinja", "JAGS", "ApacheConf", "DTD", "Lighttpd configuration file", "Java", "JavaScript+Genshi Text", "Scheme", "Nemerle", "RHTML", "Ragel in Java Host", "Darcs Patch", "Puppet", "Octave", "CoffeeScript", "Ragel in D Host", "Scilab", "Monkey", "HTML+Myghty", "CSS", "JavaScript+Smarty", "Io", "COBOLFree", "Asymptote", "vhdl", "Python 3", "CSS+Ruby", "Fortran", "d-objdump", "MySQL", "REBOL", "C++", "ERB", "CBM BASIC V2", "Befunge", "Julia", "MoonScript", "Ruby", "XML+Smarty", "Dylan", "Groovy", "MoinMoin/Trac Wiki markup", "autohotkey", "C", "HTML", "Felix", "CMake", "NSIS", "SourcePawn", "Mako", "VGL", "Velocity", "Koka", "CUDA", "Gnuplot", "IRC logs", "Prolog", "Python", "CSS+Django/Jinja", "verilog", "Smalltalk", "JavaScript+Myghty", "YAML", "Julia console", "ANTLR With ActionScript Target", "XML+Mako", "XSLT", "UrbiScript", "Scaml", "S", "DylanLID", "MAQL", "sqlite3con", "Boo", "OCaml", "eC", "ActionScript", "VB.net", "SquidConf", "XQuery", "D", "Fantom", "Gettext Catalog", "Logos", "Lasso", "SCSS", "BBCode", "Haml", "FoxPro", "Python 3.0 Traceback", "MuPAD", "XML+Ruby", "Dart", "IDL", "dg", "Evoque", "Jade", "c-objdump", "Kconfig", "Java Server Page", "reg", "ABAP", "XML+Velocity", "JavaScript+Cheetah", "HTML+Mako", "Ragel in Ruby Host", "RobotFramework", "Protocol Buffer", "CFEngine3", "Ragel", "GLSL", "COBOL", "TypeScript", "Ada", "PostgreSQL SQL dialect", "Xtend", "Logtalk", "objdump", "CSS+Mako", "ca65", "Objective-C++", "Gherkin", "HTML+PHP", "Makefile", "PostScript", "Hxml", "Kotlin", "PL/pgSQL", "Vala", "Haskell", "Bro", "Lua", "POVRay", "Sass", "ANTLR With Java Target", "Tcl", "ANTLR With ObjectiveC Target", "JavaScript+Ruby", "Racket", "AspectJ", "Base Makefile", "ANTLR With Python Target", "cpp-objdump", "Genshi Text", "Ioke", "PyPy Log", "Croc", "Objective-J", "GAS", "Batchfile", "Snobol", "XML", "ANTLR", "Opa", "XML+Cheetah", "Go", "Diff", "MiniD", "Cython", "Ragel in C Host", "Erlang", "Debian Control file", "aspx-vb", "BUGS", "Ragel in CPP Host", "aspx-cs", "Properties", "Groff", "Clojure", "Modelica", "QML", "JavaScript+Lasso", "ANTLR With Perl Target", "Genshi", "BlitzMax", "Treetop", "Matlab", "Myghty", "HTML+Genshi", "Duel", "Perl", "FSharp", "reStructuredText", "NewLisp", "Scala", "CSS+Lasso", "XML+PHP", "Stan", "INI", "MOOCode", "Shell Session", "RPMSpec", "Newspeak", "Bash Session", "Coq", "Raw token data", "Tcsh", "HTML+Lasso", "C#", "Gosu Template", "RConsole", "MXML", "TeX", "CSS+Smarty", "Text only", "ANTLR With C# Target", "OpenEdge ABL", "Cheetah", "Smali", "CSS+Myghty", "Rd", "LLVM", "Standard ML", "Elixir", "Nginx configuration file", "GoodData-CL", "AppleScript", "HTML+Smarty", "Objective-C", "JavaScript", "Rust", "Common Lisp", "Embedded Ragel", "ActionScript 3", "systemverilog", "Literate Haskell", "Python Traceback", "PHP", "ANTLR With CPP Target", "Gosu", "Hybris", "JavaScript+PHP", "Factor", "HTML+Velocity", "Mscgen", "Ooc", "SQL", "HTTP", "ECL", "Redcode", "Ragel in Objective C Host", "XML+Django/Jinja", "Awk", "JSON", "NASM", "ANTLR With Ruby Target", "XML+Myghty", "AutoIt", "Mako", "CSS+Mako", "HTML+Mako", "XML+Mako", "JavaScript+Mako"] | |||
} |
@@ -1,33 +1,13 @@ | |||
package com.bitshift.parsing; | |||
import java.io.BufferedReader; | |||
import java.io.InputStreamReader; | |||
import java.io.PrintWriter; | |||
import java.io.IOException; | |||
import java.net.ServerSocket; | |||
import java.net.Socket; | |||
import com.bitshift.parsing.parsers.JavaParser; | |||
import com.bitshift.parsing.utils.ParseServer; | |||
public class Parse { | |||
public static void main(String[] args) { | |||
String fromClient; | |||
String toClient; | |||
try { | |||
ServerSocket server = new ServerSocket(5002); | |||
while(true) { | |||
Socket clientSocket = server.accept(); | |||
JavaParser parser = new JavaParser(clientSocket); | |||
Thread parserTask = new Thread(parser); | |||
parserTask.start(); | |||
} | |||
} catch (IOException ex) { | |||
} | |||
ParseServer server = new ParseServer(Integer.parseInt(args[0])); | |||
System.out.println("Java Server listening on port " + args[0]); | |||
new Thread(server).start(); | |||
} | |||
} |
@@ -13,7 +13,6 @@ import org.eclipse.jdt.core.dom.ASTParser; | |||
import org.eclipse.jdt.core.dom.ASTVisitor; | |||
import org.eclipse.jdt.core.dom.CompilationUnit; | |||
import org.eclipse.jdt.core.dom.ClassInstanceCreation; | |||
import org.eclipse.jdt.core.dom.FieldDeclaration; | |||
import org.eclipse.jdt.core.dom.MethodDeclaration; | |||
import org.eclipse.jdt.core.dom.MethodInvocation; | |||
import org.eclipse.jdt.core.dom.Name; | |||
@@ -71,22 +70,6 @@ public class JavaParser extends Parser { | |||
this._cache = new Stack<HashMap<String, Object>>(); | |||
} | |||
public boolean visit(FieldDeclaration node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
int sl = this.root.getLineNumber(node.getStartPosition()); | |||
int sc = this.root.getColumnNumber(node.getStartPosition()); | |||
data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); | |||
this._cache.push(data); | |||
return true; | |||
} | |||
public void endVisit(FieldDeclaration node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertFieldDeclaration(name, data); | |||
} | |||
public boolean visit(MethodDeclaration node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
Name nameObj = node.getName(); | |||
@@ -115,7 +98,7 @@ public class JavaParser extends Parser { | |||
public void endVisit(MethodDeclaration node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertMethodDeclaration(name, data); | |||
this.symbols.insertMethodDeclaration("\"" + name + "\"", data); | |||
} | |||
public boolean visit(MethodInvocation node) { | |||
@@ -136,7 +119,7 @@ public class JavaParser extends Parser { | |||
public void endVisit(MethodInvocation node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertMethodInvocation(name, data); | |||
this.symbols.insertMethodInvocation("\"" + name + "\"", data); | |||
} | |||
public boolean visit(PackageDeclaration node) { | |||
@@ -167,9 +150,9 @@ public class JavaParser extends Parser { | |||
String name = (String)data.remove("name"); | |||
if (node.isInterface()) { | |||
this.symbols.insertInterfaceDeclaration(name, data); | |||
this.symbols.insertInterfaceDeclaration("\"" + name + "\"", data); | |||
} else { | |||
this.symbols.insertClassDeclaration(name, data); | |||
this.symbols.insertClassDeclaration("\"" + name + "\"", data); | |||
} | |||
} | |||
@@ -186,7 +169,7 @@ public class JavaParser extends Parser { | |||
public void endVisit(VariableDeclarationFragment node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertVariableDeclaration(name, data); | |||
this.symbols.insertVariableDeclaration("\"" + name + "\"", data); | |||
} | |||
public boolean visit(QualifiedName node) { | |||
@@ -1,8 +1,9 @@ | |||
package com.bitshift.parsing.parsers; | |||
import java.io.BufferedReader; | |||
import java.io.BufferedWriter; | |||
import java.io.InputStreamReader; | |||
import java.io.PrintWriter; | |||
import java.io.OutputStreamWriter; | |||
import java.io.IOException; | |||
import java.net.Socket; | |||
@@ -46,12 +47,16 @@ public abstract class Parser implements Runnable { | |||
protected void writeToClient(String toClient) { | |||
try { | |||
PrintWriter clientWriter = new PrintWriter( | |||
this.clientSocket.getOutputStream(), true); | |||
BufferedWriter clientWriter = new BufferedWriter( | |||
new OutputStreamWriter(this.clientSocket.getOutputStream())); | |||
PackableMemory mem = new PackableMemory(toClient.length()); | |||
PackableMemory mem = new PackableMemory(4); | |||
mem.pack(toClient.length(), 0); | |||
String dataSize = new String(mem.mem); | |||
clientWriter.println(dataSize + toClient); | |||
clientWriter.write(dataSize + toClient); | |||
clientWriter.flush(); | |||
this.clientSocket.close(); | |||
} catch (IOException ex) { | |||
} | |||
} | |||
@@ -11,15 +11,16 @@ public class JavaSymbols extends Symbols { | |||
private HashMap<String, HashMap<String, Object>> _classes; | |||
private HashMap<String, HashMap<String, Object>> _interfaces; | |||
private HashMap<String, HashMap<String, Object>> _methods; | |||
private HashMap<String, HashMap<String, Object>> _fields; | |||
private HashMap<String, HashMap<String, Object>> _vars; | |||
private final String assignKey = "\"assignments\""; | |||
private final String useKey = "\"uses\""; | |||
public JavaSymbols() { | |||
_packageName = null; | |||
_classes = new HashMap<String, HashMap<String, Object>>(); | |||
_interfaces = new HashMap<String, HashMap<String, Object>>(); | |||
_methods = new HashMap<String, HashMap<String, Object>>(); | |||
_fields = new HashMap<String, HashMap<String, Object>>(); | |||
_vars = new HashMap<String, HashMap<String, Object>>(); | |||
} | |||
@@ -34,15 +35,23 @@ public class JavaSymbols extends Symbols { | |||
HashMap<String, Object> klass = new HashMap<String, Object>(); | |||
assignments.add(data.get("coord")); | |||
klass.put("assignments", assignments); | |||
klass.put("uses", uses); | |||
klass.put(assignKey, assignments); | |||
klass.put(useKey, uses); | |||
this._classes.put(name, klass); | |||
return true; | |||
} | |||
public boolean insertInterfaceDeclaration(String name, HashMap<String, Object> data) { | |||
this._interfaces.put(name, data); | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
HashMap<String, Object> klass = new HashMap<String, Object>(); | |||
assignments.add(data.get("coord")); | |||
klass.put(assignKey, assignments); | |||
klass.put(useKey, uses); | |||
this._interfaces.put(name, klass); | |||
return true; | |||
} | |||
@@ -54,13 +63,13 @@ public class JavaSymbols extends Symbols { | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
assignments.add(data.get("coord")); | |||
method.put("assignments", assignments); | |||
method.put("uses", uses); | |||
method.put(assignKey, assignments); | |||
method.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> assignments = (ArrayList<Object>)method.get("assignments"); | |||
ArrayList<Object> assignments = (ArrayList<Object>)method.get(assignKey); | |||
assignments.add(data.get("coord")); | |||
method.put("assignments", assignments); | |||
method.put(assignKey, assignments); | |||
} | |||
this._methods.put(name, method); | |||
@@ -74,24 +83,19 @@ public class JavaSymbols extends Symbols { | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
uses.add(data.get("coord")); | |||
method.put("assignments", assignments); | |||
method.put("uses", uses); | |||
method.put(assignKey, assignments); | |||
method.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> uses = (ArrayList<Object>)method.get("uses"); | |||
ArrayList<Object> uses = (ArrayList<Object>)method.get(useKey); | |||
uses.add(data.get("coord")); | |||
method.put("uses", uses); | |||
method.put(useKey, uses); | |||
} | |||
this._methods.put(name, method); | |||
return true; | |||
} | |||
public boolean insertFieldDeclaration(String name, HashMap<String, Object> data) { | |||
this._fields.put(name, data); | |||
return true; | |||
} | |||
public boolean insertVariableDeclaration(String name, HashMap<String, Object> data) { | |||
HashMap<String, Object> var = this._vars.get(name); | |||
if (var == null) { | |||
@@ -100,13 +104,13 @@ public class JavaSymbols extends Symbols { | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
assignments.add(data.get("coord")); | |||
var.put("assignments", assignments); | |||
var.put("uses", uses); | |||
var.put(assignKey, assignments); | |||
var.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> assignments = (ArrayList<Object>)var.get("assignments"); | |||
ArrayList<Object> assignments = (ArrayList<Object>)var.get(assignKey); | |||
assignments.add(data.get("coord")); | |||
var.put("assignments", assignments); | |||
var.put(assignKey, assignments); | |||
} | |||
this._vars.put(name, var); | |||
@@ -120,13 +124,13 @@ public class JavaSymbols extends Symbols { | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
uses.add(data.get("coord")); | |||
var.put("assignments", assignments); | |||
var.put("uses", uses); | |||
var.put(assignKey, assignments); | |||
var.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> uses = (ArrayList<Object>)var.get("uses"); | |||
ArrayList<Object> uses = (ArrayList<Object>)var.get(useKey); | |||
uses.add(data.get("coord")); | |||
var.put("uses", uses); | |||
var.put(useKey, uses); | |||
} | |||
this._vars.put(name, var); | |||
@@ -135,13 +139,14 @@ public class JavaSymbols extends Symbols { | |||
public String toString() { | |||
StringBuilder builder = new StringBuilder(); | |||
builder.append("classes:" + this._classes + ","); | |||
builder.append("interfaces:" + this._interfaces + ","); | |||
builder.append("methods:" + this._methods + ","); | |||
builder.append("fields:" + this._fields + ","); | |||
builder.append("vars:" + this._vars + ","); | |||
return "{" + builder.toString() + "}"; | |||
builder.append("\"classes\":" + this._classes + ","); | |||
builder.append("\"interfaces\":" + this._interfaces + ","); | |||
builder.append("\"methods\":" + this._methods + ","); | |||
builder.append("\"vars\":" + this._vars + ","); | |||
String s = builder.toString().replaceAll("=", ":"); | |||
s = s.substring(0, s.length() - 1); | |||
return "{" + s + "}"; | |||
} | |||
} | |||
@@ -22,7 +22,7 @@ public class PackableMemory { | |||
// The most significant porion of the integer is stored in mem[loc]. | |||
// Bytes are masked out of the integer and stored in the array, working | |||
// from right(least significant) to left (most significant). | |||
void pack(int val, int loc) | |||
public void pack(int val, int loc) | |||
{ | |||
final int MASK = 0xff; | |||
for (int i = 3; i >= 0; i--) | |||
@@ -0,0 +1,65 @@ | |||
/* Code for multithreaded server taken from Jakob Jenkov */ | |||
package com.bitshift.parsing.utils; | |||
import java.net.ServerSocket; | |||
import java.net.Socket; | |||
import java.io.IOException; | |||
import com.bitshift.parsing.parsers.JavaParser; | |||
public class ParseServer implements Runnable{ | |||
protected int serverPort = 8080; | |||
protected ServerSocket serverSocket = null; | |||
protected boolean isStopped = false; | |||
protected Thread runningThread= null; | |||
public ParseServer(int port){ | |||
this.serverPort = port; | |||
} | |||
public void run(){ | |||
synchronized(this){ | |||
this.runningThread = Thread.currentThread(); | |||
} | |||
openServerSocket(); | |||
while(! isStopped()){ | |||
Socket clientSocket = null; | |||
try { | |||
clientSocket = this.serverSocket.accept(); | |||
} catch (IOException e) { | |||
if(isStopped()) { | |||
System.out.println("Server Stopped.") ; | |||
return; | |||
} | |||
throw new RuntimeException( | |||
"Error accepting client connection", e); | |||
} | |||
new Thread(new JavaParser(clientSocket)).start(); | |||
} | |||
System.out.println("Server Stopped.") ; | |||
} | |||
private synchronized boolean isStopped() { | |||
return this.isStopped; | |||
} | |||
public synchronized void stop(){ | |||
this.isStopped = true; | |||
try { | |||
this.serverSocket.close(); | |||
} catch (IOException e) { | |||
throw new RuntimeException("Error closing server", e); | |||
} | |||
} | |||
private void openServerSocket() { | |||
try { | |||
this.serverSocket = new ServerSocket(this.serverPort); | |||
} catch (IOException e) { | |||
throw new RuntimeException("Cannot open port 8080", e); | |||
} | |||
} | |||
} |
@@ -0,0 +1,23 @@ | |||
package com.bitshift.parsing.utils; | |||
import java.util.List; | |||
import java.util.Arrays; | |||
public class Tuple<T> { | |||
private List<T> _objects; | |||
public Tuple(T... args) { | |||
_objects = Arrays.asList(args); | |||
} | |||
public String toString() { | |||
StringBuilder builder = new StringBuilder(); | |||
for(T o: this._objects) { | |||
builder.append(o + ","); | |||
} | |||
String s = builder.toString(); | |||
return "(" + s.substring(0, s.length() - 1) + ")"; | |||
} | |||
} |
@@ -1,5 +1,5 @@ | |||
require File.expand_path('../lib/parse_server.rb', __FILE__) | |||
task :start_server do |t| | |||
start_server | |||
task :start_server, [:port_number] do |t, args| | |||
start_server Integer(args[:port_number]) | |||
end |
@@ -13,8 +13,9 @@ def pack_int(i) | |||
end | |||
def start_server | |||
server = TCPServer.new 5003 | |||
def start_server(port_number) | |||
server = TCPServer.new port_number | |||
puts "Ruby Server listening on port #{port_number}\n" | |||
loop do | |||
# Start a new thread for each client accepted | |||
@@ -25,7 +25,8 @@ module Bitshift | |||
def initialize(offset, tree) | |||
super() | |||
module_hash = Hash.new {|hash, key| hash[key] = { assignments: [], uses: [] }} | |||
module_hash = Hash.new {|hash, key| | |||
hash[key] = { assignments: [], uses: [] }} | |||
class_hash = module_hash.clone | |||
function_hash = module_hash.clone | |||
var_hash = module_hash.clone | |||
@@ -118,8 +119,18 @@ module Bitshift | |||
end | |||
def to_s | |||
str = symbols.to_s | |||
str = str.gsub(/:(\w*)=>/, '"\1":') | |||
new_symbols = Hash.new {|hash, key| hash[key] = Hash.new} | |||
symbols.each do |type, sym_list| | |||
sym_list.each do |name, sym| | |||
new_symbols[type.to_s][name.to_s] = { | |||
"assignments" => sym[:assignments], | |||
"uses" => sym[:uses]} | |||
end | |||
end | |||
str = new_symbols.to_s | |||
str = str.gsub(/=>/, ":") | |||
return str | |||
end | |||
end | |||
@@ -50,7 +50,7 @@ var codeExample = '<table class="highlighttable"><tr><td class="linenos"><div cl | |||
searchBar.onkeyup = typingTimer; | |||
var testCodelet = { | |||
'code_url': 'https://github.com/earwig/bitshift/blob/develop/app.py', | |||
'url': 'https://github.com/earwig/bitshift/blob/develop/app.py', | |||
'filename': 'app.py', | |||
'language': 'python', | |||
'date_created': 'May 10, 2014', | |||
@@ -179,7 +179,7 @@ function createResult(codelet) { | |||
authors.id = 'authors'; | |||
//Add the bulk of the html | |||
title.innerHTML = 'File <a href="' + codelet.code_url + '">' | |||
title.innerHTML = 'File <a href="' + codelet.url + '">' | |||
+ codelet.filename + '</a>'; | |||
site.innerHTML = 'on <a href="' + codelet.origin[1] + '">' + codelet.origin[0] +'</a>'; | |||
language.innerHTML = codelet.language; | |||
@@ -21,7 +21,7 @@ if __name__ == '__main__': | |||
elif sys.argv[1] == 'ruby': | |||
file_name = "resources/parser.rb" | |||
server_socket_number = 5003 | |||
server_socket_number = 5065 | |||
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |||
server_socket.connect(("localhost", server_socket_number)) | |||