Merge required to continue search-results styling with access to the `/search` route. Conflicts: app.pytags/v1.0^2
@@ -51,4 +51,4 @@ target | |||||
# Ctags | # Ctags | ||||
*/tags | */tags | ||||
log | |||||
logs |
@@ -2,13 +2,14 @@ | |||||
Module to contain all the project's Flask server plumbing. | Module to contain all the project's Flask server plumbing. | ||||
""" | """ | ||||
from flask import Flask | |||||
from flask import render_template, session | |||||
from json import dumps | |||||
from flask import Flask, make_response, render_template, request | |||||
from bitshift import assets | from bitshift import assets | ||||
from bitshift import languages | |||||
# from bitshift.database import Database | |||||
# from bitshift.query import parse_query | |||||
from bitshift.database import Database | |||||
from bitshift.languages import LANGS | |||||
from bitshift.query import parse_query, QueryParseException | |||||
app = Flask(__name__) | app = Flask(__name__) | ||||
app.config.from_object("bitshift.config") | app.config.from_object("bitshift.config") | ||||
@@ -17,17 +18,33 @@ app_env = app.jinja_env | |||||
app_env.line_statement_prefix = "=" | app_env.line_statement_prefix = "=" | ||||
app_env.globals.update(assets=assets) | app_env.globals.update(assets=assets) | ||||
# database = Database() | |||||
database = Database() | |||||
@app.route("/") | @app.route("/") | ||||
def index(): | def index(): | ||||
return render_template("index.html", autocomplete_languages=languages.LANGS) | |||||
@app.route("/search/<query>") | |||||
def search(query): | |||||
# tree = parse_query(query) | |||||
# database.search(tree) | |||||
pass | |||||
return render_template("index.html", autocomplete_languages=LANGS) | |||||
@app.route("/search.json") | |||||
def search(): | |||||
def reply(json): | |||||
resp = make_response(dumps(json)) | |||||
resp.mimetype = "application/json" | |||||
return resp | |||||
query, page = request.args.get("q"), request.args.get("p", 1) | |||||
if not query: | |||||
return reply({"error": "No query given"}) | |||||
try: | |||||
tree = parse_query(query) | |||||
except QueryParseException as exc: | |||||
return reply({"error": exc.args[0]}) | |||||
try: | |||||
page = int(page) | |||||
except ValueError: | |||||
return reply({"error": u"Invalid page number: %s" % page}) | |||||
count, codelets = database.search(tree, page) | |||||
results = [clt.serialize() for clt in codelets] | |||||
return reply({"count": count, "results": results}) | |||||
@app.route("/about") | @app.route("/about") | ||||
def about(): | def about(): | ||||
@@ -1,3 +1,5 @@ | |||||
from .languages import LANGS | |||||
__all__ = ["Codelet"] | __all__ = ["Codelet"] | ||||
class Codelet(object): | class Codelet(object): | ||||
@@ -11,7 +13,7 @@ class Codelet(object): | |||||
:ivar authors: (array of tuples (str, str or None)) An array of tuples | :ivar authors: (array of tuples (str, str or None)) An array of tuples | ||||
containing an author's name and profile URL (on the service the code | containing an author's name and profile URL (on the service the code | ||||
was pulled from). | was pulled from). | ||||
:ivar code_url: (str) The url of the (page containing the) source code. | |||||
:ivar url: (str) The url of the (page containing the) source code. | |||||
:ivar date_created: (:class:`datetime.datetime`, or None) The date the code | :ivar date_created: (:class:`datetime.datetime`, or None) The date the code | ||||
was published. | was published. | ||||
:ivar date_modified: (:class:`datetime.datetime`, or None) The date the | :ivar date_modified: (:class:`datetime.datetime`, or None) The date the | ||||
@@ -24,8 +26,8 @@ class Codelet(object): | |||||
added by the database. | added by the database. | ||||
""" | """ | ||||
def __init__(self, name, code, filename, language, authors, code_url, | |||||
date_created, date_modified, rank, symbols=None, origin=None): | |||||
def __init__(self, name, code, filename, language, authors, url, | |||||
date_created, date_modified, rank, symbols=None, origin=None): | |||||
""" | """ | ||||
Create a Codelet instance. | Create a Codelet instance. | ||||
@@ -34,7 +36,7 @@ class Codelet(object): | |||||
:param filename: see :attr:`self.filename` | :param filename: see :attr:`self.filename` | ||||
:param language: see :attr:`self.language` | :param language: see :attr:`self.language` | ||||
:param authors: see :attr:`self.authors` | :param authors: see :attr:`self.authors` | ||||
:param code_url: see :attr:`self.code_url` | |||||
:param url: see :attr:`self.url` | |||||
:param date_created: see :attr:`self.date_created` | :param date_created: see :attr:`self.date_created` | ||||
:param date_modified: see :attr:`self.date_modified` | :param date_modified: see :attr:`self.date_modified` | ||||
:param rank: see :attr:`self.rank` | :param rank: see :attr:`self.rank` | ||||
@@ -46,7 +48,7 @@ class Codelet(object): | |||||
:type filename: see :attr:`self.filename` | :type filename: see :attr:`self.filename` | ||||
:type language: see :attr:`self.language` | :type language: see :attr:`self.language` | ||||
:type authors: see :attr:`self.authors` | :type authors: see :attr:`self.authors` | ||||
:type code_url: see :attr:`self.code_url` | |||||
:type url: see :attr:`self.url` | |||||
:type date_created: see :attr:`self.date_created` | :type date_created: see :attr:`self.date_created` | ||||
:type date_modified: see :attr:`self.date_modified` | :type date_modified: see :attr:`self.date_modified` | ||||
:type rank: see :attr:`self.rank` | :type rank: see :attr:`self.rank` | ||||
@@ -59,9 +61,24 @@ class Codelet(object): | |||||
self.filename = filename | self.filename = filename | ||||
self.language = language | self.language = language | ||||
self.authors = authors | self.authors = authors | ||||
self.code_url = code_url | |||||
self.url = url | |||||
self.date_created = date_created | self.date_created = date_created | ||||
self.date_modified = date_modified | self.date_modified = date_modified | ||||
self.rank = rank | self.rank = rank | ||||
self.symbols = symbols or {} | self.symbols = symbols or {} | ||||
self.origin = origin or (None, None, None) | self.origin = origin or (None, None, None) | ||||
def serialize(self): | |||||
""" | |||||
Convert the codelet into a dictionary that can be sent as JSON. | |||||
:return: The codelet as a dictionary. | |||||
:rtype: str | |||||
""" | |||||
return { | |||||
"name": self.name, "code": self.code, "lang": LANGS[self.language], | |||||
"authors": self.authors, "url": self.url, | |||||
"created": self.date_created.isoformat(), | |||||
"modified": self.date_modified.isoformat(), | |||||
"symbols": self.symbols, "origin": self.origin | |||||
} |
@@ -1,55 +0,0 @@ | |||||
""" | |||||
:synopsis: Parent crawler module, which supervises all crawlers. | |||||
Contains functions for initializing all subsidiary, threaded crawlers. | |||||
""" | |||||
import logging, logging.handlers, os, Queue | |||||
from bitshift.crawler import crawler, indexer | |||||
__all__ = ["crawl"] | |||||
def crawl(): | |||||
""" | |||||
Initialize all crawlers (and indexers). | |||||
Start the: | |||||
1. GitHub crawler, :class:`crawler.GitHubCrawler`. | |||||
2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`. | |||||
3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`. | |||||
""" | |||||
_configure_logging() | |||||
MAX_URL_QUEUE_SIZE = 5e3 | |||||
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | |||||
threads = [crawler.GitHubCrawler(repo_clone_queue), | |||||
crawler.BitbucketCrawler(repo_clone_queue), | |||||
indexer.GitIndexer(repo_clone_queue)] | |||||
for thread in threads: | |||||
thread.start() | |||||
def _configure_logging(): | |||||
LOG_FILE_DIR = "log" | |||||
if not os.path.exists(LOG_FILE_DIR): | |||||
os.mkdir(LOG_FILE_DIR) | |||||
logging.getLogger("requests").setLevel(logging.WARNING) | |||||
logging.getLogger("urllib3").setLevel(logging.WARNING) | |||||
formatter = logging.Formatter( | |||||
fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s" | |||||
" %(message)s"), datefmt="%y-%m-%d %H:%M:%S") | |||||
handler = logging.handlers.TimedRotatingFileHandler( | |||||
"%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1, | |||||
backupCount=20) | |||||
handler.setFormatter(formatter) | |||||
root_logger = logging.getLogger() | |||||
root_logger.addHandler(handler) | |||||
root_logger.setLevel(logging.NOTSET) |
@@ -0,0 +1,65 @@ | |||||
""" | |||||
:synopsis: Parent crawler module, which supervises all crawlers. | |||||
Contains functions for initializing all subsidiary, threaded crawlers. | |||||
""" | |||||
import logging, logging.handlers, os, Queue | |||||
from bitshift.crawler import crawler, indexer | |||||
from bitshift.parser import parse, start_parse_servers | |||||
__all__ = ["crawl"] | |||||
def crawl(): | |||||
""" | |||||
Initialize all crawlers (and indexers). | |||||
Start the: | |||||
1. GitHub crawler, :class:`crawler.GitHubCrawler`. | |||||
2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`. | |||||
3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`. | |||||
""" | |||||
_configure_logging() | |||||
MAX_URL_QUEUE_SIZE = 5e3 | |||||
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | |||||
threads = [crawler.GitHubCrawler(repo_clone_queue), | |||||
crawler.BitbucketCrawler(repo_clone_queue), | |||||
indexer.GitIndexer(repo_clone_queue)] | |||||
for thread in threads: | |||||
thread.start() | |||||
parse_servers = start_parse_servers() | |||||
def _configure_logging(): | |||||
# This isn't ideal, since it means the bitshift python package must be kept | |||||
# inside the app, but it works for now: | |||||
root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) | |||||
log_dir = os.path.join(root, "logs") | |||||
if not os.path.exists(log_dir): | |||||
os.mkdir(log_dir) | |||||
logging.getLogger("requests").setLevel(logging.WARNING) | |||||
logging.getLogger("urllib3").setLevel(logging.WARNING) | |||||
formatter = logging.Formatter( | |||||
fmt=("%(asctime)s %(levelname)s %(name)s:%(funcName)s" | |||||
" %(message)s"), datefmt="%y-%m-%d %H:%M:%S") | |||||
handler = logging.handlers.TimedRotatingFileHandler( | |||||
"%s/%s" % (log_dir, "app.log"), when="H", interval=1, | |||||
backupCount=20) | |||||
handler.setFormatter(formatter) | |||||
root_logger = logging.getLogger() | |||||
root_logger.addHandler(handler) | |||||
root_logger.setLevel(logging.NOTSET) | |||||
if __name__ == "__main__": | |||||
_configure_logging() | |||||
crawl() |
@@ -7,6 +7,7 @@ import bs4, datetime, logging, os, Queue, re, shutil, string, subprocess, time,\ | |||||
threading | threading | ||||
from ..database import Database | from ..database import Database | ||||
from ..parser import parse, UnsupportedFileError | |||||
from ..codelet import Codelet | from ..codelet import Codelet | ||||
GIT_CLONE_DIR = "/tmp/bitshift" | GIT_CLONE_DIR = "/tmp/bitshift" | ||||
@@ -73,6 +74,7 @@ class GitIndexer(threading.Thread): | |||||
self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE) | self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE) | ||||
self.git_cloner = _GitCloner(clone_queue, self.index_queue) | self.git_cloner = _GitCloner(clone_queue, self.index_queue) | ||||
self.git_cloner.start() | self.git_cloner.start() | ||||
self.database = Database() | |||||
self._logger = logging.getLogger("%s.%s" % | self._logger = logging.getLogger("%s.%s" % | ||||
(__name__, self.__class__.__name__)) | (__name__, self.__class__.__name__)) | ||||
self._logger.info("Starting.") | self._logger.info("Starting.") | ||||
@@ -98,10 +100,7 @@ class GitIndexer(threading.Thread): | |||||
repo = self.index_queue.get() | repo = self.index_queue.get() | ||||
self.index_queue.task_done() | self.index_queue.task_done() | ||||
try: | |||||
self._index_repository(repo) | |||||
except Exception as excep: | |||||
self._logger.warning("%s: %s.", excep.__class__.__name__, excep) | |||||
self._index_repository(repo) | |||||
def _index_repository(self, repo): | def _index_repository(self, repo): | ||||
""" | """ | ||||
@@ -119,10 +118,15 @@ class GitIndexer(threading.Thread): | |||||
try: | try: | ||||
self._insert_repository_codelets(repo) | self._insert_repository_codelets(repo) | ||||
except Exception as excep: | except Exception as excep: | ||||
self._logger.warning("%s: %s.", excep.__class__.__name__, excep) | |||||
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)): | |||||
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name)) | |||||
self._logger.exception("Exception raised while indexing:") | |||||
finally: | |||||
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)): | |||||
if len([obj for obj in os.listdir('.') if | |||||
os.path.isdir(obj)]) <= 1: | |||||
shutil.rmtree("%s/%s" % ( | |||||
GIT_CLONE_DIR, repo.name.split("/")[0])) | |||||
else: | |||||
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name)) | |||||
def _insert_repository_codelets(self, repo): | def _insert_repository_codelets(self, repo): | ||||
""" | """ | ||||
@@ -147,17 +151,22 @@ class GitIndexer(threading.Thread): | |||||
source = self._decode(source_file.read()) | source = self._decode(source_file.read()) | ||||
if source is None: | if source is None: | ||||
continue | continue | ||||
except IOError as exception: | |||||
except IOError: | |||||
continue | continue | ||||
authors = [(self._decode(author), None) for author in \ | |||||
commits_meta[filename]["authors"]] | |||||
authors = [(self._decode(author), None) for author in | |||||
commits_meta[filename]["authors"]] | |||||
codelet = Codelet("%s:%s" % (repo.name, filename), source, filename, | codelet = Codelet("%s:%s" % (repo.name, filename), source, filename, | ||||
None, authors, self._generate_file_url(filename, | None, authors, self._generate_file_url(filename, | ||||
repo.url, repo.framework_name), | repo.url, repo.framework_name), | ||||
commits_meta[filename]["time_created"], | commits_meta[filename]["time_created"], | ||||
commits_meta[filename]["time_last_modified"], | commits_meta[filename]["time_last_modified"], | ||||
repo.rank) | repo.rank) | ||||
try: | |||||
parse(codelet) | |||||
except UnsupportedFileError: | |||||
continue | |||||
self.database.insert(codelet) | |||||
def _generate_file_url(self, filename, repo_url, framework_name): | def _generate_file_url(self, filename, repo_url, framework_name): | ||||
""" | """ | ||||
@@ -63,7 +63,7 @@ class Database(object): | |||||
query, args = tree.build_query(page) | query, args = tree.build_query(page) | ||||
cursor.execute(query, args) | cursor.execute(query, args) | ||||
ids = [id for id, _ in cursor.fetchall()] | ids = [id for id, _ in cursor.fetchall()] | ||||
num_results = 0 # TODO: NotImplemented | |||||
num_results = len(ids) # TODO: NotImplemented | |||||
return ids, num_results | return ids, num_results | ||||
def _get_authors_for_codelet(self, cursor, codelet_id): | def _get_authors_for_codelet(self, cursor, codelet_id): | ||||
@@ -103,13 +103,13 @@ class Database(object): | |||||
WHERE codelet_id = ?""" | WHERE codelet_id = ?""" | ||||
with self._conn.cursor(oursql.DictCursor) as dict_cursor: | with self._conn.cursor(oursql.DictCursor) as dict_cursor: | ||||
dict_cursor.executemany(query, [(id,) for id in ids]) | |||||
for row in dict_cursor.fetchone(): | |||||
codelet_id = row["codelet_id"] | |||||
for codelet_id in ids: | |||||
dict_cursor.execute(query, (codelet_id,)) | |||||
row = dict_cursor.fetchall()[0] | |||||
if row["origin_url_base"]: | if row["origin_url_base"]: | ||||
url = row["codelet_url"] | |||||
else: | |||||
url = row["origin_url_base"] + row["codelet_url"] | url = row["origin_url_base"] + row["codelet_url"] | ||||
else: | |||||
url = row["codelet_url"] | |||||
origin = (row["origin_name"], row["origin_url"], | origin = (row["origin_name"], row["origin_url"], | ||||
row["origin_image"]) | row["origin_image"]) | ||||
authors = self._get_authors_for_codelet(cursor, codelet_id) | authors = self._get_authors_for_codelet(cursor, codelet_id) | ||||
@@ -160,28 +160,31 @@ class Database(object): | |||||
:return: The total number of results, and the *n*\ th page of results. | :return: The total number of results, and the *n*\ th page of results. | ||||
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) | :rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) | ||||
""" | """ | ||||
query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||||
query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | |||||
query2 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||||
FROM cache | FROM cache | ||||
INNER JOIN cache_data ON cache_id = cdata_cache | INNER JOIN cache_data ON cache_id = cdata_cache | ||||
WHERE cache_id = ?""" | WHERE cache_id = ?""" | ||||
query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||||
query3 = "INSERT INTO cache_data VALUES (?, ?)" | |||||
query3 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||||
query4 = "INSERT INTO cache_data VALUES (?, ?)" | |||||
cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] | cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0] | ||||
with self._conn.cursor() as cursor: | with self._conn.cursor() as cursor: | ||||
cursor.execute(query1, (cache_id,)) | cursor.execute(query1, (cache_id,)) | ||||
results = cursor.fetchall() | |||||
if results: # Cache hit | |||||
num_results = results[0][1] * (10 ** results[0][2]) | |||||
ids = [res[0] for res in results] | |||||
else: # Cache miss | |||||
cache_hit = cursor.fetchall() | |||||
if cache_hit: | |||||
cursor.execute(query2, (cache_id,)) | |||||
rows = cursor.fetchall() | |||||
num_results = rows[0][1] * (10 ** rows[0][2]) if rows else 0 | |||||
ids = [row[0] for row in rows] | |||||
else: | |||||
ids, num_results = self._search_with_query(cursor, query, page) | ids, num_results = self._search_with_query(cursor, query, page) | ||||
num_exp = max(len(str(num_results)) - 3, 0) | num_exp = max(len(str(num_results)) - 3, 0) | ||||
num_results = int(round(num_results, -num_exp)) | num_results = int(round(num_results, -num_exp)) | ||||
num_mnt = num_results / (10 ** num_exp) | num_mnt = num_results / (10 ** num_exp) | ||||
cursor.execute(query2, (cache_id, num_mnt, num_exp)) | |||||
cursor.executemany(query3, [(cache_id, c_id) for c_id in ids]) | |||||
cursor.execute(query3, (cache_id, num_mnt, num_exp)) | |||||
cursor.executemany(query4, [(cache_id, c_id) for c_id in ids]) | |||||
codelet_gen = self._get_codelets_from_ids(cursor, ids) | codelet_gen = self._get_codelets_from_ids(cursor, ids) | ||||
return (num_results, list(codelet_gen)) | return (num_results, list(codelet_gen)) | ||||
@@ -0,0 +1,283 @@ | |||||
{ | |||||
"_comment" : "A list of programming languages supported by `bitshift`.", | |||||
"languages" : [ | |||||
"Debian Sourcelist", | |||||
"Delphi", | |||||
"JavaScript+Mako", | |||||
"Brainfuck", | |||||
"Ceylon", | |||||
"JavaScript+Django/Jinja", | |||||
"HTML+Evoque", | |||||
"NumPy", | |||||
"Modula-2", | |||||
"LiveScript", | |||||
"Nimrod", | |||||
"Bash", | |||||
"HTML+Django/Jinja", | |||||
"CSS+PHP", | |||||
"XML+Lasso", | |||||
"VimL", | |||||
"CSS+Genshi Text", | |||||
"Fancy", | |||||
"Coldfusion HTML", | |||||
"cfstatement", | |||||
"Scalate Server Page", | |||||
"Smarty", | |||||
"XML+Evoque", | |||||
"haXe", | |||||
"PowerShell", | |||||
"Tea", | |||||
"HTML+Cheetah", | |||||
"Mason", | |||||
"Django/Jinja", | |||||
"JAGS", | |||||
"ApacheConf", | |||||
"DTD", | |||||
"Lighttpd configuration file", | |||||
"Java", | |||||
"JavaScript+Genshi Text", | |||||
"Scheme", | |||||
"Nemerle", | |||||
"RHTML", | |||||
"Ragel in Java Host", | |||||
"Darcs Patch", | |||||
"Puppet", | |||||
"Octave", | |||||
"CoffeeScript", | |||||
"Ragel in D Host", | |||||
"Scilab", | |||||
"Monkey", | |||||
"HTML+Myghty", | |||||
"CSS", | |||||
"JavaScript+Smarty", | |||||
"Io", | |||||
"COBOLFree", | |||||
"Asymptote", | |||||
"vhdl", | |||||
"CSS+Ruby", | |||||
"Fortran", | |||||
"d-objdump", | |||||
"MySQL", | |||||
"REBOL", | |||||
"C++", | |||||
"ERB", | |||||
"CBM BASIC V2", | |||||
"Befunge", | |||||
"Julia", | |||||
"MoonScript", | |||||
"Ruby", | |||||
"XML+Smarty", | |||||
"Dylan", | |||||
"Groovy", | |||||
"MoinMoin/Trac Wiki markup", | |||||
"autohotkey", | |||||
"C", | |||||
"HTML", | |||||
"Felix", | |||||
"CMake", | |||||
"NSIS", | |||||
"SourcePawn", | |||||
"Mako", | |||||
"VGL", | |||||
"Velocity", | |||||
"Koka", | |||||
"CUDA", | |||||
"Gnuplot", | |||||
"IRC logs", | |||||
"Prolog", | |||||
"Python", | |||||
"CSS+Django/Jinja", | |||||
"verilog", | |||||
"Smalltalk", | |||||
"JavaScript+Myghty", | |||||
"YAML", | |||||
"Julia console", | |||||
"ANTLR With ActionScript Target", | |||||
"XML+Mako", | |||||
"XSLT", | |||||
"UrbiScript", | |||||
"Scaml", | |||||
"S", | |||||
"DylanLID", | |||||
"MAQL", | |||||
"sqlite3con", | |||||
"Boo", | |||||
"OCaml", | |||||
"eC", | |||||
"ActionScript", | |||||
"VB.net", | |||||
"SquidConf", | |||||
"XQuery", | |||||
"D", | |||||
"Fantom", | |||||
"Gettext Catalog", | |||||
"Logos", | |||||
"Lasso", | |||||
"SCSS", | |||||
"BBCode", | |||||
"Haml", | |||||
"FoxPro", | |||||
"MuPAD", | |||||
"XML+Ruby", | |||||
"Dart", | |||||
"IDL", | |||||
"dg", | |||||
"Evoque", | |||||
"Jade", | |||||
"c-objdump", | |||||
"Kconfig", | |||||
"Java Server Page", | |||||
"reg", | |||||
"ABAP", | |||||
"XML+Velocity", | |||||
"JavaScript+Cheetah", | |||||
"HTML+Mako", | |||||
"Ragel in Ruby Host", | |||||
"RobotFramework", | |||||
"Protocol Buffer", | |||||
"CFEngine3", | |||||
"Ragel", | |||||
"GLSL", | |||||
"COBOL", | |||||
"TypeScript", | |||||
"Ada", | |||||
"PostgreSQL SQL dialect", | |||||
"Xtend", | |||||
"Logtalk", | |||||
"objdump", | |||||
"CSS+Mako", | |||||
"ca65", | |||||
"Objective-C++", | |||||
"Gherkin", | |||||
"HTML+PHP", | |||||
"Makefile", | |||||
"PostScript", | |||||
"Hxml", | |||||
"Kotlin", | |||||
"PL/pgSQL", | |||||
"Vala", | |||||
"Haskell", | |||||
"Bro", | |||||
"Lua", | |||||
"POVRay", | |||||
"Sass", | |||||
"ANTLR With Java Target", | |||||
"Tcl", | |||||
"ANTLR With ObjectiveC Target", | |||||
"JavaScript+Ruby", | |||||
"Racket", | |||||
"AspectJ", | |||||
"Base Makefile", | |||||
"ANTLR With Python Target", | |||||
"cpp-objdump", | |||||
"Genshi Text", | |||||
"Ioke", | |||||
"PyPy Log", | |||||
"Croc", | |||||
"Objective-J", | |||||
"GAS", | |||||
"Batchfile", | |||||
"Snobol", | |||||
"XML", | |||||
"ANTLR", | |||||
"Opa", | |||||
"XML+Cheetah", | |||||
"Go", | |||||
"Diff", | |||||
"MiniD", | |||||
"Cython", | |||||
"Ragel in C Host", | |||||
"Erlang", | |||||
"Debian Control file", | |||||
"aspx-vb", | |||||
"BUGS", | |||||
"Ragel in CPP Host", | |||||
"aspx-cs", | |||||
"Properties", | |||||
"Groff", | |||||
"Clojure", | |||||
"Modelica", | |||||
"QML", | |||||
"JavaScript+Lasso", | |||||
"ANTLR With Perl Target", | |||||
"Genshi", | |||||
"BlitzMax", | |||||
"Treetop", | |||||
"Matlab", | |||||
"Myghty", | |||||
"HTML+Genshi", | |||||
"Duel", | |||||
"Perl", | |||||
"FSharp", | |||||
"reStructuredText", | |||||
"NewLisp", | |||||
"Scala", | |||||
"CSS+Lasso", | |||||
"XML+PHP", | |||||
"Stan", | |||||
"INI", | |||||
"MOOCode", | |||||
"Shell Session", | |||||
"RPMSpec", | |||||
"Newspeak", | |||||
"Bash Session", | |||||
"Coq", | |||||
"Raw token data", | |||||
"Tcsh", | |||||
"HTML+Lasso", | |||||
"C#", | |||||
"Gosu Template", | |||||
"RConsole", | |||||
"MXML", | |||||
"TeX", | |||||
"CSS+Smarty", | |||||
"Text only", | |||||
"ANTLR With C# Target", | |||||
"OpenEdge ABL", | |||||
"Cheetah", | |||||
"Smali", | |||||
"CSS+Myghty", | |||||
"Rd", | |||||
"LLVM", | |||||
"Standard ML", | |||||
"Elixir", | |||||
"Nginx configuration file", | |||||
"GoodData-CL", | |||||
"AppleScript", | |||||
"HTML+Smarty", | |||||
"Objective-C", | |||||
"JavaScript", | |||||
"Rust", | |||||
"Common Lisp", | |||||
"Embedded Ragel", | |||||
"ActionScript 3", | |||||
"systemverilog", | |||||
"Literate Haskell", | |||||
"PHP", | |||||
"ANTLR With CPP Target", | |||||
"Gosu", | |||||
"Hybris", | |||||
"JavaScript+PHP", | |||||
"Factor", | |||||
"HTML+Velocity", | |||||
"Mscgen", | |||||
"Ooc", | |||||
"SQL", | |||||
"HTTP", | |||||
"ECL", | |||||
"Redcode", | |||||
"Ragel in Objective C Host", | |||||
"XML+Django/Jinja", | |||||
"Awk", | |||||
"JSON", | |||||
"NASM", | |||||
"ANTLR With Ruby Target", | |||||
"XML+Myghty", | |||||
"AutoIt", | |||||
"Mako", | |||||
"CSS+Mako", | |||||
"HTML+Mako", | |||||
"XML+Mako", | |||||
"JavaScript+Mako" | |||||
] | |||||
} |
@@ -1,5 +1,5 @@ | |||||
import json | import json | ||||
from os import path | |||||
with open("bitshift/resources/languages.json") as lang_json: | |||||
LANGS = [lang.encode("ascii","ignore") for lang in | |||||
json.load(lang_json)["languages"]] | |||||
with open(path.join(path.dirname(__file__), "languages.json")) as lang_json: | |||||
LANGS = [lang for lang in json.load(lang_json)["languages"]] |
@@ -1,8 +1,25 @@ | |||||
import json, pygments.lexers as pgl, sys, socket, struct | |||||
import json | |||||
import sys | |||||
import socket | |||||
import struct | |||||
import subprocess | |||||
from os import path | |||||
from pygments import lexers as pgl, util | |||||
from ..languages import LANGS | from ..languages import LANGS | ||||
from .python import parse_py | from .python import parse_py | ||||
_all__ = ["parse"] | |||||
_all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] | |||||
PARSER_COMMANDS = [ | |||||
('Java', ['mvn', '-f', | |||||
path.join(path.dirname(__file__), "../../parsers/java/pom.xml"), | |||||
'exec:java', '-Dexec.args="%d"']), | |||||
('Ruby', ['rake', '-f', | |||||
path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"), | |||||
"'start_server[%d]'"]) | |||||
] | |||||
class UnsupportedFileError(Exception): | class UnsupportedFileError(Exception): | ||||
pass | pass | ||||
@@ -19,13 +36,15 @@ def _lang(codelet): | |||||
Modify function to incorporate tags from stackoverflow. | Modify function to incorporate tags from stackoverflow. | ||||
""" | """ | ||||
if codelet.filename is not None: | |||||
try: | |||||
return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name | |||||
except: | |||||
raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') | |||||
try: | |||||
if codelet.filename: | |||||
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code) | |||||
else: | |||||
lex = pgl.guess_lexer(codelet.code) | |||||
except util.ClassNotFound: | |||||
raise UnsupportedFileError(codelet.filename) | |||||
return LANGS.index(pgl.guess_lexer(codelet.code)) | |||||
return LANGS.index(lex.name) | |||||
def _recv_data(server_socket): | def _recv_data(server_socket): | ||||
""" | """ | ||||
@@ -39,8 +58,9 @@ def _recv_data(server_socket): | |||||
""" | """ | ||||
recv_size = 8192 | recv_size = 8192 | ||||
total_data = []; size_data = cur_data = '' | |||||
total_size = 0; size = sys.maxint | |||||
total_data = [] | |||||
size_data = cur_data = '' | |||||
total_size, size = 0, sys.maxint | |||||
while total_size < size: | while total_size < size: | ||||
cur_data = server_socket.recv(recv_size) | cur_data = server_socket.recv(recv_size) | ||||
@@ -61,8 +81,23 @@ def _recv_data(server_socket): | |||||
total_size = sum([len(s) for s in total_data]) | total_size = sum([len(s) for s in total_data]) | ||||
server_socket.close() | server_socket.close() | ||||
return ''.join(total_data); | |||||
return ''.join(total_data) | |||||
def start_parse_servers(): | |||||
""" | |||||
Starts all the parse servers for languages besides python. | |||||
:rtype: list | |||||
""" | |||||
procs = [] | |||||
for (lang, cmd) in PARSER_COMMANDS: | |||||
procs.append( | |||||
subprocess.Popen(' '.join(cmd) % (5001 + LANGS.index(lang)), | |||||
shell=True)) | |||||
return procs | |||||
def parse(codelet): | def parse(codelet): | ||||
""" | """ | ||||
@@ -76,9 +111,10 @@ def parse(codelet): | |||||
:type code: Codelet | :type code: Codelet | ||||
""" | """ | ||||
lang = _lang(codelet); source = codelet.code | |||||
lang = _lang(codelet) | |||||
source = codelet.code | |||||
codelet.language = lang | codelet.language = lang | ||||
server_socket_number = 5000 + lang | |||||
server_socket_number = 5001 + lang | |||||
if lang == LANGS.index('Python'): | if lang == LANGS.index('Python'): | ||||
parse_py(codelet) | parse_py(codelet) | ||||
@@ -86,8 +122,13 @@ def parse(codelet): | |||||
else: | else: | ||||
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||||
server_socket.connect(("localhost", server_socket_number)) | server_socket.connect(("localhost", server_socket_number)) | ||||
server_socket.send("%d\n%s" % (len(source), source)); | |||||
server_socket.send("%d\n%s" % (len(source), source)) | |||||
symbols = json.loads(_recv_data(server_socket)) | symbols = json.loads(_recv_data(server_socket)) | ||||
codelet.symbols = symbols | |||||
symbols = {key: [(name, [tuple(loc) | |||||
for loc in syms[name]['assignments']], | |||||
[tuple(loc) for loc in syms[name]['uses']]) | |||||
for name in syms.keys()] | |||||
for key, syms in symbols.iteritems()} | |||||
codelet.symbols = symbols |
@@ -1,4 +1,7 @@ | |||||
import ast | import ast | ||||
import re | |||||
encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) | |||||
class _CachedWalker(ast.NodeVisitor): | class _CachedWalker(ast.NodeVisitor): | ||||
""" | """ | ||||
@@ -154,7 +157,26 @@ def parse_py(codelet): | |||||
:type code: Codelet | :type code: Codelet | ||||
""" | """ | ||||
tree = ast.parse(codelet.code) | |||||
def strip_encoding(lines): | |||||
"""Strips the encoding line from a file, which breaks the parser.""" | |||||
it = iter(lines) | |||||
try: | |||||
first = next(it) | |||||
if not encoding_re.match(first): | |||||
yield first | |||||
second = next(it) | |||||
if not encoding_re.match(second): | |||||
yield second | |||||
except StopIteration: | |||||
return | |||||
for line in it: | |||||
yield line | |||||
try: | |||||
tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines()))) | |||||
except SyntaxError: | |||||
## TODO: add some logging here? | |||||
return | |||||
cutter = _CachedWalker() | cutter = _CachedWalker() | ||||
cutter.visit(tree) | cutter.visit(tree) | ||||
codelet.symbols = cutter.accum | codelet.symbols = cutter.accum |
@@ -36,10 +36,75 @@ class _QueryParser(object): | |||||
self._parse_variable: ["v", "var", "variable"] | self._parse_variable: ["v", "var", "variable"] | ||||
} | } | ||||
def _scan_query(self, query, markers): | |||||
"""Scan a query (sub)string for the first occurance of some markers. | |||||
Returns a 2-tuple of (first_marker_found, marker_index). | |||||
""" | |||||
def is_escaped(query, index): | |||||
"""Return whether a query marker is backslash-escaped.""" | |||||
return (index > 0 and query[index - 1] == "\\" and | |||||
(index < 2 or query[index - 2] != "\\")) | |||||
best_marker, best_index = None, maxsize | |||||
for marker in markers: | |||||
index = query.find(marker) | |||||
if is_escaped(query, index): | |||||
_, new_index = self._scan_query(query[index + 1:], marker) | |||||
index += new_index + 1 | |||||
if index >= 0 and index < best_index: | |||||
best_marker, best_index = marker, index | |||||
return best_marker, best_index | |||||
def _split_query(self, query, markers, parens=False): | |||||
"""Split a query string into a nested list of query terms. | |||||
Returns a list of terms and/or nested sublists of terms. Each term and | |||||
sublist is guarenteed to be non-empty. | |||||
""" | |||||
query = query.lstrip() | |||||
if not query: | |||||
return [] | |||||
marker, index = self._scan_query(query, markers) | |||||
if not marker: | |||||
return [query] | |||||
nest = [query[:index]] if index > 0 else [] | |||||
after = query[index + 1:] | |||||
if marker == " ": | |||||
nest += self._split_query(after, markers, parens) | |||||
elif marker in ('"', "'"): | |||||
close_marker, close_index = self._scan_query(after, marker) | |||||
if close_marker: | |||||
if close_index > 0: | |||||
nest.append(after[:close_index]) | |||||
after = after[close_index + 1:] | |||||
nest += self._split_query(after, markers, parens) | |||||
elif after: | |||||
nest.append(after) | |||||
elif marker == "(": | |||||
inner, after = self._split_query(after, markers, True), [] | |||||
if inner and isinstance(inner[-1], tuple): | |||||
after = self._split_query(inner.pop()[0], markers, parens) | |||||
if inner: | |||||
nest.append(inner) | |||||
if after: | |||||
nest += after | |||||
elif marker == ")": | |||||
if parens: | |||||
nest.append((after,)) | |||||
else: | |||||
nest += self._split_query(after, markers) | |||||
return nest | |||||
def _parse_literal(self, literal): | def _parse_literal(self, literal): | ||||
"""Parse part of a search query into a string or regular expression.""" | """Parse part of a search query into a string or regular expression.""" | ||||
if literal.startswith(("r:", "re:", "regex:", "regexp:")): | if literal.startswith(("r:", "re:", "regex:", "regexp:")): | ||||
return Regex(literal.split(":", 1)[1]) | |||||
arg = literal.split(":", 1)[1] | |||||
if not arg: | |||||
err = 'Incomplete query term: "%s"' % literal | |||||
raise QueryParseException(err) | |||||
return Regex(arg) | |||||
return String(literal) | return String(literal) | ||||
def _parse_language(self, term): | def _parse_language(self, term): | ||||
@@ -98,21 +163,29 @@ class _QueryParser(object): | |||||
"""Parse part of a query into a date created node and return it.""" | """Parse part of a query into a date created node and return it.""" | ||||
return self._parse_date(term, Date.CREATE) | return self._parse_date(term, Date.CREATE) | ||||
def _parse_symbol(self, term): | |||||
def _parse_symbol(self, term, stype=Symbol.ALL): | |||||
"""Parse part of a query into a symbol node and return it.""" | """Parse part of a query into a symbol node and return it.""" | ||||
return Symbol(Symbol.ALL, self._parse_literal(term)) | |||||
literal = self._parse_literal(term) | |||||
if isinstance(literal, String): | |||||
make_symbol = lambda lit: Symbol(stype, String(lit)) | |||||
symbols = self._split_query(literal.string, " \"'") | |||||
node = make_symbol(symbols.pop()) | |||||
while symbols: | |||||
node = BinaryOp(make_symbol(symbols.pop()), BinaryOp.OR, node) | |||||
return node | |||||
return Symbol(stype, literal) | |||||
def _parse_function(self, term): | def _parse_function(self, term): | ||||
"""Parse part of a query into a function node and return it.""" | """Parse part of a query into a function node and return it.""" | ||||
return Symbol(Symbol.FUNCTION, self._parse_literal(term)) | |||||
return self._parse_symbol(term, Symbol.FUNCTION) | |||||
def _parse_class(self, term): | def _parse_class(self, term): | ||||
"""Parse part of a query into a class node and return it.""" | """Parse part of a query into a class node and return it.""" | ||||
return Symbol(Symbol.CLASS, self._parse_literal(term)) | |||||
return self._parse_symbol(term, Symbol.CLASS) | |||||
def _parse_variable(self, term): | def _parse_variable(self, term): | ||||
"""Parse part of a query into a variable node and return it.""" | """Parse part of a query into a variable node and return it.""" | ||||
return Symbol(Symbol.VARIABLE, self._parse_literal(term)) | |||||
return self._parse_symbol(term, Symbol.VARIABLE) | |||||
def _parse_term(self, term): | def _parse_term(self, term): | ||||
"""Parse a query term into a tree node and return it.""" | """Parse a query term into a tree node and return it.""" | ||||
@@ -134,67 +207,6 @@ class _QueryParser(object): | |||||
return meth(arg) | return meth(arg) | ||||
return Text(self._parse_literal(term)) | return Text(self._parse_literal(term)) | ||||
def _scan_query(self, query, markers): | |||||
"""Scan a query (sub)string for the first occurance of some markers. | |||||
Returns a 2-tuple of (first_marker_found, marker_index). | |||||
""" | |||||
def is_escaped(query, index): | |||||
"""Return whether a query marker is backslash-escaped.""" | |||||
return (index > 0 and query[index - 1] == "\\" and | |||||
(index < 2 or query[index - 2] != "\\")) | |||||
best_marker, best_index = None, maxsize | |||||
for marker in markers: | |||||
index = query.find(marker) | |||||
if is_escaped(query, index): | |||||
_, new_index = self._scan_query(query[index + 1:], marker) | |||||
index += new_index + 1 | |||||
if index >= 0 and index < best_index: | |||||
best_marker, best_index = marker, index | |||||
return best_marker, best_index | |||||
def _split_query(self, query, parens=False): | |||||
"""Split a query string into a nested list of query terms. | |||||
Returns a list of terms and/or nested sublists of terms. Each term and | |||||
sublist is guarenteed to be non-empty. | |||||
""" | |||||
query = query.lstrip() | |||||
if not query: | |||||
return [] | |||||
marker, index = self._scan_query(query, " \"'()") | |||||
if not marker: | |||||
return [query] | |||||
nest = [query[:index]] if index > 0 else [] | |||||
after = query[index + 1:] | |||||
if marker == " ": | |||||
nest += self._split_query(after, parens) | |||||
elif marker in ('"', "'"): | |||||
close_marker, close_index = self._scan_query(after, marker) | |||||
if close_marker: | |||||
if close_index > 0: | |||||
nest.append(after[:close_index]) | |||||
after = after[close_index + 1:] | |||||
nest += self._split_query(after, parens) | |||||
elif after: | |||||
nest.append(after) | |||||
elif marker == "(": | |||||
inner, after = self._split_query(after, True), [] | |||||
if inner and isinstance(inner[-1], tuple): | |||||
after = self._split_query(inner.pop()[0], parens) | |||||
if inner: | |||||
nest.append(inner) | |||||
if after: | |||||
nest += after | |||||
elif marker == ")": | |||||
if parens: | |||||
nest.append((after,)) | |||||
else: | |||||
nest += self._split_query(after) | |||||
return nest | |||||
def _parse_boolean_operators(self, nest): | def _parse_boolean_operators(self, nest): | ||||
"""Parse boolean operators in a nested query list.""" | """Parse boolean operators in a nested query list.""" | ||||
op_lookup = { | op_lookup = { | ||||
@@ -271,7 +283,7 @@ class _QueryParser(object): | |||||
:raises: :py:class:`.QueryParseException` | :raises: :py:class:`.QueryParseException` | ||||
""" | """ | ||||
nest = self._split_query(query.rstrip()) | |||||
nest = self._split_query(query.rstrip(), " \"'()") | |||||
if not nest: | if not nest: | ||||
raise QueryParseException('Empty query: "%s"' % query) | raise QueryParseException('Empty query: "%s"' % query) | ||||
self._parse_boolean_operators(nest) | self._parse_boolean_operators(nest) | ||||
@@ -195,7 +195,7 @@ class Symbol(_Node): | |||||
CLASS = 1 | CLASS = 1 | ||||
VARIABLE = 2 | VARIABLE = 2 | ||||
TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"} | TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"} | ||||
TYPES_INV = ["functions", "classes", "variables"] | |||||
TYPES_INV = ["functions", "classes", "vars"] | |||||
def __init__(self, type_, name): | def __init__(self, type_, name): | ||||
""" | """ | ||||
@@ -1,4 +0,0 @@ | |||||
{ | |||||
"_comment" : "A list of programming languages supported by `bitshift`.", | |||||
"languages" : ["Debian Sourcelist", "Delphi", "JavaScript+Mako", "Brainfuck", "Ceylon", "JavaScript+Django/Jinja", "HTML+Evoque", "NumPy", "Modula-2", "LiveScript", "Nimrod", "Bash", "HTML+Django/Jinja", "CSS+PHP", "XML+Lasso", "VimL", "CSS+Genshi Text", "Fancy", "Coldfusion HTML", "cfstatement", "Scalate Server Page", "Smarty", "XML+Evoque", "haXe", "PowerShell", "Tea", "HTML+Cheetah", "Mason", "Django/Jinja", "JAGS", "ApacheConf", "DTD", "Lighttpd configuration file", "Java", "JavaScript+Genshi Text", "Scheme", "Nemerle", "RHTML", "Ragel in Java Host", "Darcs Patch", "Puppet", "Octave", "CoffeeScript", "Ragel in D Host", "Scilab", "Monkey", "HTML+Myghty", "CSS", "JavaScript+Smarty", "Io", "COBOLFree", "Asymptote", "vhdl", "Python 3", "CSS+Ruby", "Fortran", "d-objdump", "MySQL", "REBOL", "C++", "ERB", "CBM BASIC V2", "Befunge", "Julia", "MoonScript", "Ruby", "XML+Smarty", "Dylan", "Groovy", "MoinMoin/Trac Wiki markup", "autohotkey", "C", "HTML", "Felix", "CMake", "NSIS", "SourcePawn", "Mako", "VGL", "Velocity", "Koka", "CUDA", "Gnuplot", "IRC logs", "Prolog", "Python", "CSS+Django/Jinja", "verilog", "Smalltalk", "JavaScript+Myghty", "YAML", "Julia console", "ANTLR With ActionScript Target", "XML+Mako", "XSLT", "UrbiScript", "Scaml", "S", "DylanLID", "MAQL", "sqlite3con", "Boo", "OCaml", "eC", "ActionScript", "VB.net", "SquidConf", "XQuery", "D", "Fantom", "Gettext Catalog", "Logos", "Lasso", "SCSS", "BBCode", "Haml", "FoxPro", "Python 3.0 Traceback", "MuPAD", "XML+Ruby", "Dart", "IDL", "dg", "Evoque", "Jade", "c-objdump", "Kconfig", "Java Server Page", "reg", "ABAP", "XML+Velocity", "JavaScript+Cheetah", "HTML+Mako", "Ragel in Ruby Host", "RobotFramework", "Protocol Buffer", "CFEngine3", "Ragel", "GLSL", "COBOL", "TypeScript", "Ada", "PostgreSQL SQL dialect", "Xtend", "Logtalk", "objdump", "CSS+Mako", "ca65", "Objective-C++", "Gherkin", "HTML+PHP", "Makefile", "PostScript", "Hxml", "Kotlin", "PL/pgSQL", "Vala", "Haskell", "Bro", "Lua", "POVRay", "Sass", "ANTLR With Java Target", "Tcl", "ANTLR With ObjectiveC Target", "JavaScript+Ruby", "Racket", "AspectJ", "Base Makefile", "ANTLR With Python Target", "cpp-objdump", "Genshi Text", "Ioke", "PyPy Log", "Croc", "Objective-J", "GAS", "Batchfile", "Snobol", "XML", "ANTLR", "Opa", "XML+Cheetah", "Go", "Diff", "MiniD", "Cython", "Ragel in C Host", "Erlang", "Debian Control file", "aspx-vb", "BUGS", "Ragel in CPP Host", "aspx-cs", "Properties", "Groff", "Clojure", "Modelica", "QML", "JavaScript+Lasso", "ANTLR With Perl Target", "Genshi", "BlitzMax", "Treetop", "Matlab", "Myghty", "HTML+Genshi", "Duel", "Perl", "FSharp", "reStructuredText", "NewLisp", "Scala", "CSS+Lasso", "XML+PHP", "Stan", "INI", "MOOCode", "Shell Session", "RPMSpec", "Newspeak", "Bash Session", "Coq", "Raw token data", "Tcsh", "HTML+Lasso", "C#", "Gosu Template", "RConsole", "MXML", "TeX", "CSS+Smarty", "Text only", "ANTLR With C# Target", "OpenEdge ABL", "Cheetah", "Smali", "CSS+Myghty", "Rd", "LLVM", "Standard ML", "Elixir", "Nginx configuration file", "GoodData-CL", "AppleScript", "HTML+Smarty", "Objective-C", "JavaScript", "Rust", "Common Lisp", "Embedded Ragel", "ActionScript 3", "systemverilog", "Literate Haskell", "Python Traceback", "PHP", "ANTLR With CPP Target", "Gosu", "Hybris", "JavaScript+PHP", "Factor", "HTML+Velocity", "Mscgen", "Ooc", "SQL", "HTTP", "ECL", "Redcode", "Ragel in Objective C Host", "XML+Django/Jinja", "Awk", "JSON", "NASM", "ANTLR With Ruby Target", "XML+Myghty", "AutoIt", "Mako", "CSS+Mako", "HTML+Mako", "XML+Mako", "JavaScript+Mako"] | |||||
} |
@@ -1,33 +1,13 @@ | |||||
package com.bitshift.parsing; | package com.bitshift.parsing; | ||||
import java.io.BufferedReader; | |||||
import java.io.InputStreamReader; | |||||
import java.io.PrintWriter; | |||||
import java.io.IOException; | |||||
import java.net.ServerSocket; | |||||
import java.net.Socket; | |||||
import com.bitshift.parsing.parsers.JavaParser; | |||||
import com.bitshift.parsing.utils.ParseServer; | |||||
public class Parse { | public class Parse { | ||||
public static void main(String[] args) { | public static void main(String[] args) { | ||||
String fromClient; | |||||
String toClient; | |||||
try { | |||||
ServerSocket server = new ServerSocket(5002); | |||||
while(true) { | |||||
Socket clientSocket = server.accept(); | |||||
JavaParser parser = new JavaParser(clientSocket); | |||||
Thread parserTask = new Thread(parser); | |||||
parserTask.start(); | |||||
} | |||||
} catch (IOException ex) { | |||||
} | |||||
ParseServer server = new ParseServer(Integer.parseInt(args[0])); | |||||
System.out.println("Java Server listening on port " + args[0]); | |||||
new Thread(server).start(); | |||||
} | } | ||||
} | } |
@@ -13,7 +13,6 @@ import org.eclipse.jdt.core.dom.ASTParser; | |||||
import org.eclipse.jdt.core.dom.ASTVisitor; | import org.eclipse.jdt.core.dom.ASTVisitor; | ||||
import org.eclipse.jdt.core.dom.CompilationUnit; | import org.eclipse.jdt.core.dom.CompilationUnit; | ||||
import org.eclipse.jdt.core.dom.ClassInstanceCreation; | import org.eclipse.jdt.core.dom.ClassInstanceCreation; | ||||
import org.eclipse.jdt.core.dom.FieldDeclaration; | |||||
import org.eclipse.jdt.core.dom.MethodDeclaration; | import org.eclipse.jdt.core.dom.MethodDeclaration; | ||||
import org.eclipse.jdt.core.dom.MethodInvocation; | import org.eclipse.jdt.core.dom.MethodInvocation; | ||||
import org.eclipse.jdt.core.dom.Name; | import org.eclipse.jdt.core.dom.Name; | ||||
@@ -71,22 +70,6 @@ public class JavaParser extends Parser { | |||||
this._cache = new Stack<HashMap<String, Object>>(); | this._cache = new Stack<HashMap<String, Object>>(); | ||||
} | } | ||||
public boolean visit(FieldDeclaration node) { | |||||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||||
int sl = this.root.getLineNumber(node.getStartPosition()); | |||||
int sc = this.root.getColumnNumber(node.getStartPosition()); | |||||
data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); | |||||
this._cache.push(data); | |||||
return true; | |||||
} | |||||
public void endVisit(FieldDeclaration node) { | |||||
HashMap<String, Object> data = this._cache.pop(); | |||||
String name = (String)data.remove("name"); | |||||
this.symbols.insertFieldDeclaration(name, data); | |||||
} | |||||
public boolean visit(MethodDeclaration node) { | public boolean visit(MethodDeclaration node) { | ||||
HashMap<String, Object> data = new HashMap<String, Object>(); | HashMap<String, Object> data = new HashMap<String, Object>(); | ||||
Name nameObj = node.getName(); | Name nameObj = node.getName(); | ||||
@@ -115,7 +98,7 @@ public class JavaParser extends Parser { | |||||
public void endVisit(MethodDeclaration node) { | public void endVisit(MethodDeclaration node) { | ||||
HashMap<String, Object> data = this._cache.pop(); | HashMap<String, Object> data = this._cache.pop(); | ||||
String name = (String)data.remove("name"); | String name = (String)data.remove("name"); | ||||
this.symbols.insertMethodDeclaration(name, data); | |||||
this.symbols.insertMethodDeclaration("\"" + name + "\"", data); | |||||
} | } | ||||
public boolean visit(MethodInvocation node) { | public boolean visit(MethodInvocation node) { | ||||
@@ -136,7 +119,7 @@ public class JavaParser extends Parser { | |||||
public void endVisit(MethodInvocation node) { | public void endVisit(MethodInvocation node) { | ||||
HashMap<String, Object> data = this._cache.pop(); | HashMap<String, Object> data = this._cache.pop(); | ||||
String name = (String)data.remove("name"); | String name = (String)data.remove("name"); | ||||
this.symbols.insertMethodInvocation(name, data); | |||||
this.symbols.insertMethodInvocation("\"" + name + "\"", data); | |||||
} | } | ||||
public boolean visit(PackageDeclaration node) { | public boolean visit(PackageDeclaration node) { | ||||
@@ -167,9 +150,9 @@ public class JavaParser extends Parser { | |||||
String name = (String)data.remove("name"); | String name = (String)data.remove("name"); | ||||
if (node.isInterface()) { | if (node.isInterface()) { | ||||
this.symbols.insertInterfaceDeclaration(name, data); | |||||
this.symbols.insertInterfaceDeclaration("\"" + name + "\"", data); | |||||
} else { | } else { | ||||
this.symbols.insertClassDeclaration(name, data); | |||||
this.symbols.insertClassDeclaration("\"" + name + "\"", data); | |||||
} | } | ||||
} | } | ||||
@@ -186,7 +169,7 @@ public class JavaParser extends Parser { | |||||
public void endVisit(VariableDeclarationFragment node) { | public void endVisit(VariableDeclarationFragment node) { | ||||
HashMap<String, Object> data = this._cache.pop(); | HashMap<String, Object> data = this._cache.pop(); | ||||
String name = (String)data.remove("name"); | String name = (String)data.remove("name"); | ||||
this.symbols.insertVariableDeclaration(name, data); | |||||
this.symbols.insertVariableDeclaration("\"" + name + "\"", data); | |||||
} | } | ||||
public boolean visit(QualifiedName node) { | public boolean visit(QualifiedName node) { | ||||
@@ -1,8 +1,9 @@ | |||||
package com.bitshift.parsing.parsers; | package com.bitshift.parsing.parsers; | ||||
import java.io.BufferedReader; | import java.io.BufferedReader; | ||||
import java.io.BufferedWriter; | |||||
import java.io.InputStreamReader; | import java.io.InputStreamReader; | ||||
import java.io.PrintWriter; | |||||
import java.io.OutputStreamWriter; | |||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.net.Socket; | import java.net.Socket; | ||||
@@ -46,12 +47,16 @@ public abstract class Parser implements Runnable { | |||||
protected void writeToClient(String toClient) { | protected void writeToClient(String toClient) { | ||||
try { | try { | ||||
PrintWriter clientWriter = new PrintWriter( | |||||
this.clientSocket.getOutputStream(), true); | |||||
BufferedWriter clientWriter = new BufferedWriter( | |||||
new OutputStreamWriter(this.clientSocket.getOutputStream())); | |||||
PackableMemory mem = new PackableMemory(toClient.length()); | |||||
PackableMemory mem = new PackableMemory(4); | |||||
mem.pack(toClient.length(), 0); | |||||
String dataSize = new String(mem.mem); | String dataSize = new String(mem.mem); | ||||
clientWriter.println(dataSize + toClient); | |||||
clientWriter.write(dataSize + toClient); | |||||
clientWriter.flush(); | |||||
this.clientSocket.close(); | |||||
} catch (IOException ex) { | } catch (IOException ex) { | ||||
} | } | ||||
} | } | ||||
@@ -11,15 +11,16 @@ public class JavaSymbols extends Symbols { | |||||
private HashMap<String, HashMap<String, Object>> _classes; | private HashMap<String, HashMap<String, Object>> _classes; | ||||
private HashMap<String, HashMap<String, Object>> _interfaces; | private HashMap<String, HashMap<String, Object>> _interfaces; | ||||
private HashMap<String, HashMap<String, Object>> _methods; | private HashMap<String, HashMap<String, Object>> _methods; | ||||
private HashMap<String, HashMap<String, Object>> _fields; | |||||
private HashMap<String, HashMap<String, Object>> _vars; | private HashMap<String, HashMap<String, Object>> _vars; | ||||
private final String assignKey = "\"assignments\""; | |||||
private final String useKey = "\"uses\""; | |||||
public JavaSymbols() { | public JavaSymbols() { | ||||
_packageName = null; | _packageName = null; | ||||
_classes = new HashMap<String, HashMap<String, Object>>(); | _classes = new HashMap<String, HashMap<String, Object>>(); | ||||
_interfaces = new HashMap<String, HashMap<String, Object>>(); | _interfaces = new HashMap<String, HashMap<String, Object>>(); | ||||
_methods = new HashMap<String, HashMap<String, Object>>(); | _methods = new HashMap<String, HashMap<String, Object>>(); | ||||
_fields = new HashMap<String, HashMap<String, Object>>(); | |||||
_vars = new HashMap<String, HashMap<String, Object>>(); | _vars = new HashMap<String, HashMap<String, Object>>(); | ||||
} | } | ||||
@@ -34,15 +35,23 @@ public class JavaSymbols extends Symbols { | |||||
HashMap<String, Object> klass = new HashMap<String, Object>(); | HashMap<String, Object> klass = new HashMap<String, Object>(); | ||||
assignments.add(data.get("coord")); | assignments.add(data.get("coord")); | ||||
klass.put("assignments", assignments); | |||||
klass.put("uses", uses); | |||||
klass.put(assignKey, assignments); | |||||
klass.put(useKey, uses); | |||||
this._classes.put(name, klass); | this._classes.put(name, klass); | ||||
return true; | return true; | ||||
} | } | ||||
public boolean insertInterfaceDeclaration(String name, HashMap<String, Object> data) { | public boolean insertInterfaceDeclaration(String name, HashMap<String, Object> data) { | ||||
this._interfaces.put(name, data); | |||||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||||
HashMap<String, Object> klass = new HashMap<String, Object>(); | |||||
assignments.add(data.get("coord")); | |||||
klass.put(assignKey, assignments); | |||||
klass.put(useKey, uses); | |||||
this._interfaces.put(name, klass); | |||||
return true; | return true; | ||||
} | } | ||||
@@ -54,13 +63,13 @@ public class JavaSymbols extends Symbols { | |||||
ArrayList<Object> uses = new ArrayList<Object>(10); | ArrayList<Object> uses = new ArrayList<Object>(10); | ||||
assignments.add(data.get("coord")); | assignments.add(data.get("coord")); | ||||
method.put("assignments", assignments); | |||||
method.put("uses", uses); | |||||
method.put(assignKey, assignments); | |||||
method.put(useKey, uses); | |||||
} else { | } else { | ||||
ArrayList<Object> assignments = (ArrayList<Object>)method.get("assignments"); | |||||
ArrayList<Object> assignments = (ArrayList<Object>)method.get(assignKey); | |||||
assignments.add(data.get("coord")); | assignments.add(data.get("coord")); | ||||
method.put("assignments", assignments); | |||||
method.put(assignKey, assignments); | |||||
} | } | ||||
this._methods.put(name, method); | this._methods.put(name, method); | ||||
@@ -74,24 +83,19 @@ public class JavaSymbols extends Symbols { | |||||
ArrayList<Object> uses = new ArrayList<Object>(10); | ArrayList<Object> uses = new ArrayList<Object>(10); | ||||
uses.add(data.get("coord")); | uses.add(data.get("coord")); | ||||
method.put("assignments", assignments); | |||||
method.put("uses", uses); | |||||
method.put(assignKey, assignments); | |||||
method.put(useKey, uses); | |||||
} else { | } else { | ||||
ArrayList<Object> uses = (ArrayList<Object>)method.get("uses"); | |||||
ArrayList<Object> uses = (ArrayList<Object>)method.get(useKey); | |||||
uses.add(data.get("coord")); | uses.add(data.get("coord")); | ||||
method.put("uses", uses); | |||||
method.put(useKey, uses); | |||||
} | } | ||||
this._methods.put(name, method); | this._methods.put(name, method); | ||||
return true; | return true; | ||||
} | } | ||||
public boolean insertFieldDeclaration(String name, HashMap<String, Object> data) { | |||||
this._fields.put(name, data); | |||||
return true; | |||||
} | |||||
public boolean insertVariableDeclaration(String name, HashMap<String, Object> data) { | public boolean insertVariableDeclaration(String name, HashMap<String, Object> data) { | ||||
HashMap<String, Object> var = this._vars.get(name); | HashMap<String, Object> var = this._vars.get(name); | ||||
if (var == null) { | if (var == null) { | ||||
@@ -100,13 +104,13 @@ public class JavaSymbols extends Symbols { | |||||
ArrayList<Object> uses = new ArrayList<Object>(10); | ArrayList<Object> uses = new ArrayList<Object>(10); | ||||
assignments.add(data.get("coord")); | assignments.add(data.get("coord")); | ||||
var.put("assignments", assignments); | |||||
var.put("uses", uses); | |||||
var.put(assignKey, assignments); | |||||
var.put(useKey, uses); | |||||
} else { | } else { | ||||
ArrayList<Object> assignments = (ArrayList<Object>)var.get("assignments"); | |||||
ArrayList<Object> assignments = (ArrayList<Object>)var.get(assignKey); | |||||
assignments.add(data.get("coord")); | assignments.add(data.get("coord")); | ||||
var.put("assignments", assignments); | |||||
var.put(assignKey, assignments); | |||||
} | } | ||||
this._vars.put(name, var); | this._vars.put(name, var); | ||||
@@ -120,13 +124,13 @@ public class JavaSymbols extends Symbols { | |||||
ArrayList<Object> uses = new ArrayList<Object>(10); | ArrayList<Object> uses = new ArrayList<Object>(10); | ||||
uses.add(data.get("coord")); | uses.add(data.get("coord")); | ||||
var.put("assignments", assignments); | |||||
var.put("uses", uses); | |||||
var.put(assignKey, assignments); | |||||
var.put(useKey, uses); | |||||
} else { | } else { | ||||
ArrayList<Object> uses = (ArrayList<Object>)var.get("uses"); | |||||
ArrayList<Object> uses = (ArrayList<Object>)var.get(useKey); | |||||
uses.add(data.get("coord")); | uses.add(data.get("coord")); | ||||
var.put("uses", uses); | |||||
var.put(useKey, uses); | |||||
} | } | ||||
this._vars.put(name, var); | this._vars.put(name, var); | ||||
@@ -135,13 +139,14 @@ public class JavaSymbols extends Symbols { | |||||
public String toString() { | public String toString() { | ||||
StringBuilder builder = new StringBuilder(); | StringBuilder builder = new StringBuilder(); | ||||
builder.append("classes:" + this._classes + ","); | |||||
builder.append("interfaces:" + this._interfaces + ","); | |||||
builder.append("methods:" + this._methods + ","); | |||||
builder.append("fields:" + this._fields + ","); | |||||
builder.append("vars:" + this._vars + ","); | |||||
return "{" + builder.toString() + "}"; | |||||
builder.append("\"classes\":" + this._classes + ","); | |||||
builder.append("\"interfaces\":" + this._interfaces + ","); | |||||
builder.append("\"methods\":" + this._methods + ","); | |||||
builder.append("\"vars\":" + this._vars + ","); | |||||
String s = builder.toString().replaceAll("=", ":"); | |||||
s = s.substring(0, s.length() - 1); | |||||
return "{" + s + "}"; | |||||
} | } | ||||
} | } | ||||
@@ -22,7 +22,7 @@ public class PackableMemory { | |||||
// The most significant porion of the integer is stored in mem[loc]. | // The most significant porion of the integer is stored in mem[loc]. | ||||
// Bytes are masked out of the integer and stored in the array, working | // Bytes are masked out of the integer and stored in the array, working | ||||
// from right(least significant) to left (most significant). | // from right(least significant) to left (most significant). | ||||
void pack(int val, int loc) | |||||
public void pack(int val, int loc) | |||||
{ | { | ||||
final int MASK = 0xff; | final int MASK = 0xff; | ||||
for (int i = 3; i >= 0; i--) | for (int i = 3; i >= 0; i--) | ||||
@@ -0,0 +1,65 @@ | |||||
/* Code for multithreaded server taken from Jakob Jenkov */ | |||||
package com.bitshift.parsing.utils; | |||||
import java.net.ServerSocket; | |||||
import java.net.Socket; | |||||
import java.io.IOException; | |||||
import com.bitshift.parsing.parsers.JavaParser; | |||||
public class ParseServer implements Runnable{ | |||||
protected int serverPort = 8080; | |||||
protected ServerSocket serverSocket = null; | |||||
protected boolean isStopped = false; | |||||
protected Thread runningThread= null; | |||||
public ParseServer(int port){ | |||||
this.serverPort = port; | |||||
} | |||||
public void run(){ | |||||
synchronized(this){ | |||||
this.runningThread = Thread.currentThread(); | |||||
} | |||||
openServerSocket(); | |||||
while(! isStopped()){ | |||||
Socket clientSocket = null; | |||||
try { | |||||
clientSocket = this.serverSocket.accept(); | |||||
} catch (IOException e) { | |||||
if(isStopped()) { | |||||
System.out.println("Server Stopped.") ; | |||||
return; | |||||
} | |||||
throw new RuntimeException( | |||||
"Error accepting client connection", e); | |||||
} | |||||
new Thread(new JavaParser(clientSocket)).start(); | |||||
} | |||||
System.out.println("Server Stopped.") ; | |||||
} | |||||
private synchronized boolean isStopped() { | |||||
return this.isStopped; | |||||
} | |||||
public synchronized void stop(){ | |||||
this.isStopped = true; | |||||
try { | |||||
this.serverSocket.close(); | |||||
} catch (IOException e) { | |||||
throw new RuntimeException("Error closing server", e); | |||||
} | |||||
} | |||||
private void openServerSocket() { | |||||
try { | |||||
this.serverSocket = new ServerSocket(this.serverPort); | |||||
} catch (IOException e) { | |||||
throw new RuntimeException("Cannot open port 8080", e); | |||||
} | |||||
} | |||||
} |
@@ -0,0 +1,23 @@ | |||||
package com.bitshift.parsing.utils; | |||||
import java.util.List; | |||||
import java.util.Arrays; | |||||
public class Tuple<T> { | |||||
private List<T> _objects; | |||||
public Tuple(T... args) { | |||||
_objects = Arrays.asList(args); | |||||
} | |||||
public String toString() { | |||||
StringBuilder builder = new StringBuilder(); | |||||
for(T o: this._objects) { | |||||
builder.append(o + ","); | |||||
} | |||||
String s = builder.toString(); | |||||
return "(" + s.substring(0, s.length() - 1) + ")"; | |||||
} | |||||
} |
@@ -1,5 +1,5 @@ | |||||
require File.expand_path('../lib/parse_server.rb', __FILE__) | require File.expand_path('../lib/parse_server.rb', __FILE__) | ||||
task :start_server do |t| | |||||
start_server | |||||
task :start_server, [:port_number] do |t, args| | |||||
start_server Integer(args[:port_number]) | |||||
end | end |
@@ -13,8 +13,9 @@ def pack_int(i) | |||||
end | end | ||||
def start_server | |||||
server = TCPServer.new 5003 | |||||
def start_server(port_number) | |||||
server = TCPServer.new port_number | |||||
puts "Ruby Server listening on port #{port_number}\n" | |||||
loop do | loop do | ||||
# Start a new thread for each client accepted | # Start a new thread for each client accepted | ||||
@@ -25,7 +25,8 @@ module Bitshift | |||||
def initialize(offset, tree) | def initialize(offset, tree) | ||||
super() | super() | ||||
module_hash = Hash.new {|hash, key| hash[key] = { assignments: [], uses: [] }} | |||||
module_hash = Hash.new {|hash, key| | |||||
hash[key] = { assignments: [], uses: [] }} | |||||
class_hash = module_hash.clone | class_hash = module_hash.clone | ||||
function_hash = module_hash.clone | function_hash = module_hash.clone | ||||
var_hash = module_hash.clone | var_hash = module_hash.clone | ||||
@@ -118,8 +119,18 @@ module Bitshift | |||||
end | end | ||||
def to_s | def to_s | ||||
str = symbols.to_s | |||||
str = str.gsub(/:(\w*)=>/, '"\1":') | |||||
new_symbols = Hash.new {|hash, key| hash[key] = Hash.new} | |||||
symbols.each do |type, sym_list| | |||||
sym_list.each do |name, sym| | |||||
new_symbols[type.to_s][name.to_s] = { | |||||
"assignments" => sym[:assignments], | |||||
"uses" => sym[:uses]} | |||||
end | |||||
end | |||||
str = new_symbols.to_s | |||||
str = str.gsub(/=>/, ":") | |||||
return str | return str | ||||
end | end | ||||
end | end | ||||
@@ -50,7 +50,7 @@ var codeExample = '<table class="highlighttable"><tr><td class="linenos"><div cl | |||||
searchBar.onkeyup = typingTimer; | searchBar.onkeyup = typingTimer; | ||||
var testCodelet = { | var testCodelet = { | ||||
'code_url': 'https://github.com/earwig/bitshift/blob/develop/app.py', | |||||
'url': 'https://github.com/earwig/bitshift/blob/develop/app.py', | |||||
'filename': 'app.py', | 'filename': 'app.py', | ||||
'language': 'python', | 'language': 'python', | ||||
'date_created': 'May 10, 2014', | 'date_created': 'May 10, 2014', | ||||
@@ -179,7 +179,7 @@ function createResult(codelet) { | |||||
authors.id = 'authors'; | authors.id = 'authors'; | ||||
//Add the bulk of the html | //Add the bulk of the html | ||||
title.innerHTML = 'File <a href="' + codelet.code_url + '">' | |||||
title.innerHTML = 'File <a href="' + codelet.url + '">' | |||||
+ codelet.filename + '</a>'; | + codelet.filename + '</a>'; | ||||
site.innerHTML = 'on <a href="' + codelet.origin[1] + '">' + codelet.origin[0] +'</a>'; | site.innerHTML = 'on <a href="' + codelet.origin[1] + '">' + codelet.origin[0] +'</a>'; | ||||
language.innerHTML = codelet.language; | language.innerHTML = codelet.language; | ||||
@@ -21,7 +21,7 @@ if __name__ == '__main__': | |||||
elif sys.argv[1] == 'ruby': | elif sys.argv[1] == 'ruby': | ||||
file_name = "resources/parser.rb" | file_name = "resources/parser.rb" | ||||
server_socket_number = 5003 | |||||
server_socket_number = 5065 | |||||
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||||
server_socket.connect(("localhost", server_socket_number)) | server_socket.connect(("localhost", server_socket_number)) | ||||