From 4dfd2974723a349b82b9837813b42a87d28d7267 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 5 May 2014 10:35:57 -0400 Subject: [PATCH 01/36] Update some documentation. --- README.md | 6 ++++++ bitshift/__init__.py | 9 ++++++++- docs/source/api/bitshift.crawler.rst | 27 +++++++++++++++++++++++++++ docs/source/api/bitshift.database.rst | 19 +++++++++++++++++++ docs/source/api/bitshift.rst | 10 ++-------- docs/source/conf.py | 2 +- setup.py | 2 +- 7 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 docs/source/api/bitshift.crawler.rst create mode 100644 docs/source/api/bitshift.database.rst diff --git a/README.md b/README.md index 8ca31d7..96a93bc 100644 --- a/README.md +++ b/README.md @@ -32,3 +32,9 @@ root. Note that this will revert any custom changes made to the files in `docs/source/api`, so you might want to update them by hand instead. [SASS]: http://sass-lang.com/guide + +Releasing +--------- + +- Update `__version__` in `bitshift/__init__.py`, `version` in `setup.py`, and + `version` and `release` in `docs/conf.py`. diff --git a/bitshift/__init__.py b/bitshift/__init__.py index 78ca5e9..0bd031c 100644 --- a/bitshift/__init__.py +++ b/bitshift/__init__.py @@ -1 +1,8 @@ -from . import assets, codelet, config, database, parser, query, crawler +# -*- coding: utf-8 -*- + +__author__ = "Benjamin Attal, Ben Kurtovic, Severyn Kozak" +__copyright__ = "Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak" +__license__ = "MIT License" +__version__ = "0.1.dev" + +from . import assets, codelet, config, crawler, database, parser, query diff --git a/docs/source/api/bitshift.crawler.rst b/docs/source/api/bitshift.crawler.rst new file mode 100644 index 0000000..2add004 --- /dev/null +++ b/docs/source/api/bitshift.crawler.rst @@ -0,0 +1,27 @@ +crawler Package +=============== + +:mod:`crawler` Package +---------------------- + +.. automodule:: bitshift.crawler + :members: + :undoc-members: + :show-inheritance: + +:mod:`crawler` Module +--------------------- + +.. automodule:: bitshift.crawler.crawler + :members: + :undoc-members: + :show-inheritance: + +:mod:`indexer` Module +--------------------- + +.. automodule:: bitshift.crawler.indexer + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/source/api/bitshift.database.rst b/docs/source/api/bitshift.database.rst new file mode 100644 index 0000000..38e20b6 --- /dev/null +++ b/docs/source/api/bitshift.database.rst @@ -0,0 +1,19 @@ +database Package +================ + +:mod:`database` Package +----------------------- + +.. automodule:: bitshift.database + :members: + :undoc-members: + :show-inheritance: + +:mod:`migration` Module +----------------------- + +.. automodule:: bitshift.database.migration + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/source/api/bitshift.rst b/docs/source/api/bitshift.rst index 1b1c703..388ac71 100644 --- a/docs/source/api/bitshift.rst +++ b/docs/source/api/bitshift.rst @@ -33,19 +33,13 @@ bitshift Package :undoc-members: :show-inheritance: -:mod:`database` Module ----------------------- - -.. automodule:: bitshift.database - :members: - :undoc-members: - :show-inheritance: - Subpackages ----------- .. toctree:: + bitshift.crawler + bitshift.database bitshift.parser bitshift.query diff --git a/docs/source/conf.py b/docs/source/conf.py index 5aee357..1f9d1be 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -59,7 +59,7 @@ copyright = u'2014, Benjamin Attal, Ben Kurtovic, Severyn Kozak' # The short X.Y version. version = '0.1' # The full version, including alpha/beta/rc tags. -release = '0.1' +release = '0.1.dev' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/setup.py b/setup.py index 47508e9..48d4c42 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name = "bitshift", - version = "0.1", + version = "0.1.dev", packages = find_packages(), install_requires = [ "Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0", From d88e68e16ef2772088676f87713fc534adcc74f7 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 11 Apr 2014 13:03:13 -0400 Subject: [PATCH 02/36] Add dispatch 'parse' function to parser __init__.py. Basic code language identification as well. Included pycparser as a depedency. --- .gitignore | 1 + bitshift/parser/__init__.py | 25 +++++++++++++++++++++++++ bitshift/parser/c.py | 0 bitshift/parser/java.py | 0 bitshift/parser/python.py | 0 5 files changed, 26 insertions(+) create mode 100644 bitshift/parser/c.py create mode 100644 bitshift/parser/java.py create mode 100644 bitshift/parser/python.py diff --git a/.gitignore b/.gitignore index 7e00121..4ce656b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.swp .sass-cache .DS_Store .my.cnf diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index e69de29..4d787c7 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -0,0 +1,25 @@ +from .python import parse_py +from .c import parse_c +from .java import parse_java +import pygments.lexers as pgl + +_all__ = ["parse"] + +# TODO: modify to incorporate tags from stackoverflow +def _lang(codelet): + if codelet.filename is not None: + return pgl.guess_lexer_for_filename(codelet.filename).name + + return pgl.guess_lexer(codelet.code) + +# dispatches the codelet to the correct parser +def parser(codelet): + lang = _lang(codelet) + + if lang == "Python": + parse_py(codelet) + elif lang == "C": + parse_c(codelet) + elif lang == "Java": + parse_java(codelet) + diff --git a/bitshift/parser/c.py b/bitshift/parser/c.py new file mode 100644 index 0000000..e69de29 diff --git a/bitshift/parser/java.py b/bitshift/parser/java.py new file mode 100644 index 0000000..e69de29 diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py new file mode 100644 index 0000000..e69de29 From efdcb3793ac2235ba6d9c75f74dbbc2815e3779d Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 11 Apr 2014 13:14:59 -0400 Subject: [PATCH 03/36] Add docstrings for functions in parser. Add ivar for syntax tree to codelet documentation. --- bitshift/codelet.py | 2 ++ bitshift/parser/__init__.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/bitshift/codelet.py b/bitshift/codelet.py index 453ace0..ead8524 100644 --- a/bitshift/codelet.py +++ b/bitshift/codelet.py @@ -18,6 +18,8 @@ class Codelet(object): code was last modified. :ivar rank: (float) A quanitification of the source code's quality, as per available ratings (stars, forks, upvotes, etc.). + :ivar symbols: Dictionary containing dictionaries of functions, classes, + variable definitions, etc. """ def __init__(self, name, code, filename, language, authors, code_url, diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 4d787c7..03d812c 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -7,13 +7,26 @@ _all__ = ["parse"] # TODO: modify to incorporate tags from stackoverflow def _lang(codelet): + """ + Private function to identify the language of a codelet. + + :param codelet: The codelet object to identified. + + :type code: Codelet + """ if codelet.filename is not None: return pgl.guess_lexer_for_filename(codelet.filename).name return pgl.guess_lexer(codelet.code) -# dispatches the codelet to the correct parser def parser(codelet): + """ + Dispatch codelet to correct parser by language of code. + + :param codelet: The codelet object to parsed. + + :type code: Codelet + """ lang = _lang(codelet) if lang == "Python": From 903e4ccc05b71e725f0bd505eb5491ca70f48e40 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 14 Apr 2014 12:51:42 -0400 Subject: [PATCH 04/36] Add constants in bitshift/config.py for languages instead of just strings. --- bitshift/config.py | 10 ++++++++++ bitshift/parser/__init__.py | 9 +++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/bitshift/config.py b/bitshift/config.py index 6c7be42..e69e367 100644 --- a/bitshift/config.py +++ b/bitshift/config.py @@ -4,3 +4,13 @@ Module to contain definitions of all Flask variables required by the app module. DEBUG = True SECRET_KEY = "\x89\x87\x9a9\xab{\xda\xfe.28\xb4\x18\x01\x95]]\xd2\xeaen\xe0Ot" + +LANG_PYTHON = 0 +LANG_C = 1 +LANG_JAVA = 2 + +PYG_IDS = { + "Python": LANG_PYTHON, + "C": LANG_C, + "JAVA": LANG_JAVA +} diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 03d812c..3220a73 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,6 +1,7 @@ from .python import parse_py from .c import parse_c from .java import parse_java +from bitshift.config import LANG_PYTHON, LANG_C, LANG_JAVA, PYG_IDS import pygments.lexers as pgl _all__ = ["parse"] @@ -17,7 +18,7 @@ def _lang(codelet): if codelet.filename is not None: return pgl.guess_lexer_for_filename(codelet.filename).name - return pgl.guess_lexer(codelet.code) + return PYG_IDS[pgl.guess_lexer(codelet.code)] def parser(codelet): """ @@ -29,10 +30,10 @@ def parser(codelet): """ lang = _lang(codelet) - if lang == "Python": + if lang == LANG_PYTHON: parse_py(codelet) - elif lang == "C": + elif lang == LANG_C: parse_c(codelet) - elif lang == "Java": + elif lang == LANG_JAVA: parse_java(codelet) From 0c5e4572f840019d6fba5860bb16798ff49c212c Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 14 Apr 2014 12:53:31 -0400 Subject: [PATCH 05/36] Add placeholder functions for parsing c and java in bitshift/parser. Add parse_py function with helper functions. Parse_py grabs relevant information on variables, functions, and classes from abstract syntax tree of codelet code. --- bitshift/parser/c.py | 3 ++ bitshift/parser/java.py | 3 ++ bitshift/parser/python.py | 93 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/bitshift/parser/c.py b/bitshift/parser/c.py index e69de29..774825c 100644 --- a/bitshift/parser/c.py +++ b/bitshift/parser/c.py @@ -0,0 +1,3 @@ + +def parse_c(): + pass diff --git a/bitshift/parser/java.py b/bitshift/parser/java.py index e69de29..a495a10 100644 --- a/bitshift/parser/java.py +++ b/bitshift/parser/java.py @@ -0,0 +1,3 @@ + +def parse_java(): + pass diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index e69de29..abccfb5 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -0,0 +1,93 @@ +import ast + +def _serialize(tree): + """ + Private function to serialize an abstract syntax tree so it is indexable by the database. + + :param tree: The syntax tree to be serialized. + + :type tree: list or ast.AST + """ + + def _start_n_end(big_node): + """ + Helper function to get the start and end lines of a code node. + + :param big_node: The node. + + :type big_node: ast.FunctionDef, ast.ClassDef, ast.Module + """ + + start_line = big_node.lineno + + temp_node = big_node + while 'body' in temp_node.__dict__: + temp_node = temp_node.body[-1] + + end_line = temp_node.lineno + return (start_line, end_line) + + def _helper(cur_node, accum): + """ + Helper function for _serialize which recursively updates the 'vars', 'functions', and 'classes' in the parsed version of the tree. + + :param cur_node: The node in the syntax tree currently being parsed. + :param accum: Dicitonary holding parsed version of the tree. + + :type cur_node: list or ast.AST + :type accum: dict + """ + + if isinstance(cur_node, list): + for node in cur_node: + _helper(node, accum) + + elif isinstance(cur_node, ast.Assign): + # return name + # return col and line offset + # in the future add value and type metadata + for t in cur_node.targets: + if isinstance(t, ast.Tuple): + for n in t.elts: + line, col = n.lineno, n.col_offset + accum['vars'][n.id] = {'ln': line, 'col': col} + else: + line, col = t.lineno, t.col_offset + accum['vars'][t.id] = {'ln': line, 'col': col} + + + elif isinstance(cur_node, ast.FunctionDef): + # return name + # return start and end of the function + # in the future add arguments and decorators metadata + start_line, end_line = _start_n_end(cur_node) + accum['functions'][cur_node.name] = {'start_ln': start_line , 'end_ln': end_line} + + elif isinstance(cur_node, ast.ClassDef): + # return name + # return start and end of the class + # in the future add arguments, inherits, and decorators metadata + start_line, end_line = _start_n_end(cur_node) + accum['classes'][cur_node.name] = {'start_ln': start_line , 'end_ln': end_line} + + elif isinstance(cur_node, ast.AST): + for k in cur_node.__dict__.keys(): + node = node.__dict__[k] + _helper(node, accum) + + accum = {} + _helper(tree, accum) + return accum + +def parse_py(codelet): + """ + Adds 'symbols' field to the codelet after parsing the code. + + :param codelet: The codelet object to parsed. + + :type code: Codelet + """ + tree = ast.parse(codelet.code) + symbols = _serialize(tree) + codelet.symbols = symbols + print symbols From 5db273a7731098ba6cd5f07eea3be51c1838e59b Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 14 Apr 2014 13:13:20 -0400 Subject: [PATCH 06/36] Bugfixes for _serialize function in bitshift/parser/python.py --- bitshift/parser/python.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index abccfb5..51a3555 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -70,12 +70,12 @@ def _serialize(tree): start_line, end_line = _start_n_end(cur_node) accum['classes'][cur_node.name] = {'start_ln': start_line , 'end_ln': end_line} - elif isinstance(cur_node, ast.AST): + if isinstance(cur_node, ast.AST): for k in cur_node.__dict__.keys(): - node = node.__dict__[k] + node = cur_node.__dict__[k] _helper(node, accum) - accum = {} + accum = {'vars': {}, 'functions': {}, 'classes': {}} _helper(tree, accum) return accum From 4d8c818c05ee6e16ddad8119bfbb29fb01ace138 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 14 Apr 2014 17:45:53 -0400 Subject: [PATCH 07/36] Corrected documentation in bitshift/codelet.py and bitshift/parser/__init__.py --- bitshift/codelet.py | 2 +- bitshift/parser/__init__.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bitshift/codelet.py b/bitshift/codelet.py index ead8524..acaa52b 100644 --- a/bitshift/codelet.py +++ b/bitshift/codelet.py @@ -18,7 +18,7 @@ class Codelet(object): code was last modified. :ivar rank: (float) A quanitification of the source code's quality, as per available ratings (stars, forks, upvotes, etc.). - :ivar symbols: Dictionary containing dictionaries of functions, classes, + :ivar symbols: (dict) Dictionary containing dictionaries of functions, classes, variable definitions, etc. """ diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 3220a73..3476487 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -6,7 +6,6 @@ import pygments.lexers as pgl _all__ = ["parse"] -# TODO: modify to incorporate tags from stackoverflow def _lang(codelet): """ Private function to identify the language of a codelet. @@ -14,7 +13,11 @@ def _lang(codelet): :param codelet: The codelet object to identified. :type code: Codelet + + .. todo:: + Modify function to incorporate tags from stackoverflow. """ + if codelet.filename is not None: return pgl.guess_lexer_for_filename(codelet.filename).name @@ -28,6 +31,7 @@ def parser(codelet): :type code: Codelet """ + lang = _lang(codelet) if lang == LANG_PYTHON: From b77db873c138876c1a362f2dcc603e9351d4eb5d Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 14 Apr 2014 17:47:48 -0400 Subject: [PATCH 08/36] Refactor parsing in python by adding node visitor class. Performs same tasks as previous version, but is more concise. Add: bitshift/parser/python.py: Add PyTreeCutter class to perform actions on specific nodes. --- bitshift/parser/python.py | 122 +++++++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 55 deletions(-) diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 51a3555..1b86f3a 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -1,15 +1,20 @@ import ast -def _serialize(tree): +class PyTreeCutter(ast.NodeVisitor): """ - Private function to serialize an abstract syntax tree so it is indexable by the database. + Local node visitor for python abstract syntax trees. - :param tree: The syntax tree to be serialized. - - :type tree: list or ast.AST + :ivar accum: (dict) Relevant data accumulated from an abstract syntax tree. """ - def _start_n_end(big_node): + def __init__(self): + """ + Create a PyTreeCutter instance. + """ + + self.accum = {'vars': {}, 'functions': {}, 'classes': {}} + + def start_n_end(self, big_node): """ Helper function to get the start and end lines of a code node. @@ -27,57 +32,63 @@ def _serialize(tree): end_line = temp_node.lineno return (start_line, end_line) - def _helper(cur_node, accum): + def visit_Assign(self, node): + """ + Visits Assign nodes in a tree. Adds relevant data about them to accum. + + :param node: The current node. + + :type node: ast.Assign + + .. todo:: + Add value and type metadata to accum. """ - Helper function for _serialize which recursively updates the 'vars', 'functions', and 'classes' in the parsed version of the tree. - :param cur_node: The node in the syntax tree currently being parsed. - :param accum: Dicitonary holding parsed version of the tree. + for t in node.targets: + if isinstance(t, ast.Tuple): + for n in t.elts: + line, col = n.lineno, n.col_offset + self.accum['vars'][n.id] = {'ln': line, 'col': col} + else: + line, col = t.lineno, t.col_offset + self.accum['vars'][t.id] = {'ln': line, 'col': col} - :type cur_node: list or ast.AST - :type accum: dict + self.generic_visit(node) + + def visit_FunctionDef(self, node): """ + Visits FunctionDef nodes in a tree. Adds relevant data about them to accum. + + :param node: The current node. - if isinstance(cur_node, list): - for node in cur_node: - _helper(node, accum) - - elif isinstance(cur_node, ast.Assign): - # return name - # return col and line offset - # in the future add value and type metadata - for t in cur_node.targets: - if isinstance(t, ast.Tuple): - for n in t.elts: - line, col = n.lineno, n.col_offset - accum['vars'][n.id] = {'ln': line, 'col': col} - else: - line, col = t.lineno, t.col_offset - accum['vars'][t.id] = {'ln': line, 'col': col} - - - elif isinstance(cur_node, ast.FunctionDef): - # return name - # return start and end of the function - # in the future add arguments and decorators metadata - start_line, end_line = _start_n_end(cur_node) - accum['functions'][cur_node.name] = {'start_ln': start_line , 'end_ln': end_line} - - elif isinstance(cur_node, ast.ClassDef): - # return name - # return start and end of the class - # in the future add arguments, inherits, and decorators metadata - start_line, end_line = _start_n_end(cur_node) - accum['classes'][cur_node.name] = {'start_ln': start_line , 'end_ln': end_line} - - if isinstance(cur_node, ast.AST): - for k in cur_node.__dict__.keys(): - node = cur_node.__dict__[k] - _helper(node, accum) - - accum = {'vars': {}, 'functions': {}, 'classes': {}} - _helper(tree, accum) - return accum + :type node: ast.FunctionDef + + .. todo:: + Add arguments and decorators metadata to accum. + """ + + start_line, end_line = self.start_n_end(node) + self.accum['functions'][node.name] = {'start_ln': start_line, + 'end_ln': end_line} + + self.generic_visit(node) + + def visit_ClassDef(self, node): + """ + Visits ClassDef nodes in a tree. Adds relevant data about them to accum. + + :param node: The current node. + + :type node: ast.ClassDef + + .. todo:: + Add arguments, inherits, and decorators metadata to accum. + """ + start_line, end_line = self.start_n_end(node) + self.accum['functions'][node.name] = {'start_ln': start_line, + 'end_ln': end_line} + + self.generic_visit(node) def parse_py(codelet): """ @@ -87,7 +98,8 @@ def parse_py(codelet): :type code: Codelet """ + tree = ast.parse(codelet.code) - symbols = _serialize(tree) - codelet.symbols = symbols - print symbols + cutter = PyTreeCutter() + cutter.visit(tree) + codelet.symbols = cutter.accum From d485b87f211d40124f4396bc46d505300ded2336 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 14 Apr 2014 19:10:32 -0400 Subject: [PATCH 09/36] Fix docstring in bitshift/parser/python.py --- bitshift/parser/python.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 1b86f3a..16e1dd4 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -16,11 +16,11 @@ class PyTreeCutter(ast.NodeVisitor): def start_n_end(self, big_node): """ - Helper function to get the start and end lines of a code node. + Helper function to get the start and end lines of an AST node. :param big_node: The node. - :type big_node: ast.FunctionDef, ast.ClassDef, ast.Module + :type big_node: ast.FunctionDef or ast.ClassDef or ast.Module """ start_line = big_node.lineno From 847410b13c22d34c2f3605cc6dae54197112be76 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Tue, 15 Apr 2014 13:31:01 -0400 Subject: [PATCH 10/36] Minor fix-ups in python parser. Mod: python.py - Add self.cache to allow for saving of unassocaited metadata as the PyTreeCutter moves down the syntax tree. - Update docstrings. --- bitshift/parser/python.py | 55 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 16e1dd4..8c59301 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -4,7 +4,16 @@ class PyTreeCutter(ast.NodeVisitor): """ Local node visitor for python abstract syntax trees. - :ivar accum: (dict) Relevant data accumulated from an abstract syntax tree. + :ivar accum: (dict) Information on variables, functions, and classes + accumulated from an abstract syntax tree. + + :ivar cache: (dict or None) Information stored about parent nodes. Added + to accum when node reaches the lowest possible level. + + .. todo:: + Add visit funciton for ast.Name to record all uses of a variable. + + Use self.cache to store extra information about nodes. """ def __init__(self): @@ -13,24 +22,25 @@ class PyTreeCutter(ast.NodeVisitor): """ self.accum = {'vars': {}, 'functions': {}, 'classes': {}} + self.cache = None - def start_n_end(self, big_node): + def start_n_end(self, node): """ Helper function to get the start and end lines of an AST node. - :param big_node: The node. + :param node: The node. - :type big_node: ast.FunctionDef or ast.ClassDef or ast.Module + :type node: ast.FunctionDef or ast.ClassDef or ast.Module """ - start_line = big_node.lineno + start_line, start_col = node.lineno, node.col_offset - temp_node = big_node + temp_node = node while 'body' in temp_node.__dict__: temp_node = temp_node.body[-1] - end_line = temp_node.lineno - return (start_line, end_line) + end_line, end_col = temp_node.lineno, temp_node.col_offset + return (start_line, start_col, end_line, end_col) def visit_Assign(self, node): """ @@ -48,10 +58,16 @@ class PyTreeCutter(ast.NodeVisitor): if isinstance(t, ast.Tuple): for n in t.elts: line, col = n.lineno, n.col_offset - self.accum['vars'][n.id] = {'ln': line, 'col': col} + self.accum['functions'][n.id]['start_ln'] = line + self.accum['functions'][n.id]['start_col'] = col + self.accum['functions'][n.id]['end_ln'] = line + self.accum['functions'][n.id]['end_ln'] = col else: line, col = t.lineno, t.col_offset - self.accum['vars'][t.id] = {'ln': line, 'col': col} + self.accum['functions'][t.id]['start_ln'] = line + self.accum['functions'][t.id]['start_col'] = col + self.accum['functions'][t.id]['end_ln'] = line + self.accum['functions'][t.id]['end_ln'] = col self.generic_visit(node) @@ -67,9 +83,11 @@ class PyTreeCutter(ast.NodeVisitor): Add arguments and decorators metadata to accum. """ - start_line, end_line = self.start_n_end(node) - self.accum['functions'][node.name] = {'start_ln': start_line, - 'end_ln': end_line} + start_line, start_col, end_line, end_col = self.start_n_end(node) + self.accum['functions'][node.name]['start_ln'] = start_line + self.accum['functions'][node.name]['start_col'] = start_col + self.accum['functions'][node.name]['end_ln'] = end_line + self.accum['functions'][node.name]['end_ln'] = end_col self.generic_visit(node) @@ -84,15 +102,18 @@ class PyTreeCutter(ast.NodeVisitor): .. todo:: Add arguments, inherits, and decorators metadata to accum. """ - start_line, end_line = self.start_n_end(node) - self.accum['functions'][node.name] = {'start_ln': start_line, - 'end_ln': end_line} + + start_line, start_col, end_line, end_col = self.start_n_end(node) + self.accum['functions'][node.name]['start_ln'] = start_line + self.accum['functions'][node.name]['start_col'] = start_col + self.accum['functions'][node.name]['end_ln'] = end_line + self.accum['functions'][node.name]['end_ln'] = end_col self.generic_visit(node) def parse_py(codelet): """ - Adds 'symbols' field to the codelet after parsing the code. + Adds 'symbols' field to the codelet after parsing the python code. :param codelet: The codelet object to parsed. From 0a57cf50e693723623dfb47322305caeaa8ad7fc Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Tue, 15 Apr 2014 13:38:20 -0400 Subject: [PATCH 11/36] Add first version of the c parser Add: c.py - CTreeCutter class is very similar to PyTreeCutter. It utilizes self.cache as opposed to PyTreeCutter which doesn't yet. - CTreeCutter visit functions simply add start and end lines of the node to the cache, and visit_Decl pushes the cache onto accum. - parse_c performs a task identical to parse_py. However, many c files need to be pre-processed before they are parsed. --- bitshift/parser/c.py | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/bitshift/parser/c.py b/bitshift/parser/c.py index 774825c..7754b81 100644 --- a/bitshift/parser/c.py +++ b/bitshift/parser/c.py @@ -1,3 +1,106 @@ +from pycparser import c_parser, c_ast -def parse_c(): - pass +class CTreeCutter(c_ast.NodeVisitor): + """ + Local node visitor for c abstract syntax trees. + + :ivar accum: (dict) Information on variables, functions, and structs + accumulated from an abstract syntax tree. + + :ivar cache: (dict or None) Information stored about parent nodes. Added + to accum when node reaches the lowest possible level. + + .. todo:: + Add visit function for c_ast.ID to record all uses of a variable. + + Use self.cache to store extra information about variables. + """ + + def __init__(self): + """ + Create a CTreeCutter instance. + """ + + self.accum = {'vars': {}, 'functions': {}, 'structs': {}} + self.cache = None + + def start_n_end(self, node): + pass + + def visit_FuncDecl(self, node): + """ + Visits FuncDecl nodes in a tree. Adds relevant data about them to accum + after visiting all of its children as well. + + :param node: The current node. + + :type node: c_ast.FuncDecl + + .. todo:: + Add other relevant information about functions like parameters and + return type. + """ + + self.cache['group'] = 'functions' + self.cache['meta']['end_ln'] = node.coord.line + self.cache['meta']['end_col'] = node.coord.column + + self.generic_visit(node) + + def visit_Struct(self, node): + """ + Visits Struct nodes in a tree. Adds relevant data about them to accum + after visiting all of its children as well. + + :param node: The current node. + + :type node: c_ast.Struct + + .. todo:: + Find other relevant information to add about structs. + """ + + self.cache['group'] = 'structs' + self.cache['meta']['end_ln'] = node.coord.line + self.cache['meta']['end_col'] = node.coord.column + + self.generic_visit(node) + + def visit_Decl(self, node): + """ + Visits Decl nodes in a tree. Adds relevant data about them to accum + after visiting all of its children as well. + + :param node: The current node. + + :type node: c_ast.Decl + """ + + self.cache = {'group': 'vars', 'meta': {}} + + self.cache['meta']['start_ln'] = node.coord.line + self.cache['meta']['start_col'] = node.coord.column + self.cache['meta']['end_ln'] = node.coord.line + self.cache['meta']['end_col'] = node.coord.column + + self.generic_visit(node) + + self.accum[self.cache['group']][node.name] = self.cache['meta'] + self.cache = None + +def parse_c(codelet): + """ + Adds 'symbols' field to the codelet after parsing the c code. + + :param codelet: The codelet object to parsed. + + :type code: Codelet + + .. todo:: + Preprocess c code so that no ParseErrors are thrown. + """ + + tree = c_parser.CParser().parse(codelet.code) + cutter = CTreeCutter() + cutter.visit(tree) + codelet.symbols = cutter.accum From a8f918f7c4dc9810d5ecd06c14ef87b2f757ee32 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 18 Apr 2014 15:36:38 -0400 Subject: [PATCH 12/36] Update class names. Move language ids to languages.py --- .gitmodules | 3 +++ bitshift/config.py | 10 ---------- bitshift/languages.py | 2 ++ bitshift/parser/__init__.py | 13 +++++-------- bitshift/parser/c.py | 6 +++--- bitshift/parser/java.py | 3 --- bitshift/parser/pylj | 1 + bitshift/parser/python.py | 6 +++--- 8 files changed, 17 insertions(+), 27 deletions(-) create mode 100644 .gitmodules create mode 100644 bitshift/languages.py delete mode 100644 bitshift/parser/java.py create mode 160000 bitshift/parser/pylj diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..afd8277 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "bitshift/parser/pylj"] + path = bitshift/parser/pylj + url = git@github.com:musiKk/plyj.git diff --git a/bitshift/config.py b/bitshift/config.py index e69e367..6c7be42 100644 --- a/bitshift/config.py +++ b/bitshift/config.py @@ -4,13 +4,3 @@ Module to contain definitions of all Flask variables required by the app module. DEBUG = True SECRET_KEY = "\x89\x87\x9a9\xab{\xda\xfe.28\xb4\x18\x01\x95]]\xd2\xeaen\xe0Ot" - -LANG_PYTHON = 0 -LANG_C = 1 -LANG_JAVA = 2 - -PYG_IDS = { - "Python": LANG_PYTHON, - "C": LANG_C, - "JAVA": LANG_JAVA -} diff --git a/bitshift/languages.py b/bitshift/languages.py new file mode 100644 index 0000000..6775711 --- /dev/null +++ b/bitshift/languages.py @@ -0,0 +1,2 @@ + +LANGS = ["Python", "C"] diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 3476487..d3915fc 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,7 +1,6 @@ from .python import parse_py from .c import parse_c -from .java import parse_java -from bitshift.config import LANG_PYTHON, LANG_C, LANG_JAVA, PYG_IDS +from ..languages import LANGS import pygments.lexers as pgl _all__ = ["parse"] @@ -21,9 +20,9 @@ def _lang(codelet): if codelet.filename is not None: return pgl.guess_lexer_for_filename(codelet.filename).name - return PYG_IDS[pgl.guess_lexer(codelet.code)] + return LANGS.index(pgl.guess_lexer(codelet.code)) -def parser(codelet): +def parse(codelet): """ Dispatch codelet to correct parser by language of code. @@ -34,10 +33,8 @@ def parser(codelet): lang = _lang(codelet) - if lang == LANG_PYTHON: + if lang == LANGS.index("Python"): parse_py(codelet) - elif lang == LANG_C: + elif lang == LANGS.index("C"): parse_c(codelet) - elif lang == LANG_JAVA: - parse_java(codelet) diff --git a/bitshift/parser/c.py b/bitshift/parser/c.py index 7754b81..50ee6eb 100644 --- a/bitshift/parser/c.py +++ b/bitshift/parser/c.py @@ -1,6 +1,6 @@ from pycparser import c_parser, c_ast -class CTreeCutter(c_ast.NodeVisitor): +class _TreeCutter(c_ast.NodeVisitor): """ Local node visitor for c abstract syntax trees. @@ -18,7 +18,7 @@ class CTreeCutter(c_ast.NodeVisitor): def __init__(self): """ - Create a CTreeCutter instance. + Create a _TreeCutter instance. """ self.accum = {'vars': {}, 'functions': {}, 'structs': {}} @@ -101,6 +101,6 @@ def parse_c(codelet): """ tree = c_parser.CParser().parse(codelet.code) - cutter = CTreeCutter() + cutter = _TreeCutter() cutter.visit(tree) codelet.symbols = cutter.accum diff --git a/bitshift/parser/java.py b/bitshift/parser/java.py deleted file mode 100644 index a495a10..0000000 --- a/bitshift/parser/java.py +++ /dev/null @@ -1,3 +0,0 @@ - -def parse_java(): - pass diff --git a/bitshift/parser/pylj b/bitshift/parser/pylj new file mode 160000 index 0000000..323dd4e --- /dev/null +++ b/bitshift/parser/pylj @@ -0,0 +1 @@ +Subproject commit 323dd4e266b47579aef2347e68214a6fbe083add diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 8c59301..1e011fb 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -1,6 +1,6 @@ import ast -class PyTreeCutter(ast.NodeVisitor): +class _TreeCutter(ast.NodeVisitor): """ Local node visitor for python abstract syntax trees. @@ -18,7 +18,7 @@ class PyTreeCutter(ast.NodeVisitor): def __init__(self): """ - Create a PyTreeCutter instance. + Create a _TreeCutter instance. """ self.accum = {'vars': {}, 'functions': {}, 'classes': {}} @@ -121,6 +121,6 @@ def parse_py(codelet): """ tree = ast.parse(codelet.code) - cutter = PyTreeCutter() + cutter = _TreeCutter() cutter.visit(tree) codelet.symbols = cutter.accum From 430b7d35883c2d3d2b4b2e721c8db6eee7e6a8ab Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 18 Apr 2014 15:37:21 -0400 Subject: [PATCH 13/36] Remove unecessary submodule. --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .gitmodules diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index afd8277..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "bitshift/parser/pylj"] - path = bitshift/parser/pylj - url = git@github.com:musiKk/plyj.git From 3bc748242d29dfec7120e0314061ffece28d2295 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 18 Apr 2014 15:45:12 -0400 Subject: [PATCH 14/36] Refactor parser/__init__.py for new parsing mechanism --- bitshift/parser/__init__.py | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index d3915fc..28d3e98 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,40 +1,23 @@ -from .python import parse_py -from .c import parse_c -from ..languages import LANGS -import pygments.lexers as pgl +import os _all__ = ["parse"] -def _lang(codelet): +def parse(codelet): """ - Private function to identify the language of a codelet. + Sends codelet code to the Java parsing process via a named pipe. Reads the + resulting symbols from the pipe and updates the codelet. - :param codelet: The codelet object to identified. + :param codelet: The codelet object to parsed. :type code: Codelet .. todo:: - Modify function to incorporate tags from stackoverflow. - """ - - if codelet.filename is not None: - return pgl.guess_lexer_for_filename(codelet.filename).name - - return LANGS.index(pgl.guess_lexer(codelet.code)) - -def parse(codelet): - """ - Dispatch codelet to correct parser by language of code. + Create a named pipe for python process to communicate with Java + process. - :param codelet: The codelet object to parsed. + Send the process id and codelet code through the named pipe. - :type code: Codelet + Read the result from the named pipe and turn it into a dict. """ - - lang = _lang(codelet) - - if lang == LANGS.index("Python"): - parse_py(codelet) - elif lang == LANGS.index("C"): - parse_c(codelet) + pass From a1066dd0936dd8b67fbebdad2fac8babb1f1c4e5 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 18 Apr 2014 16:31:43 -0400 Subject: [PATCH 15/36] Modify parser/__init__.py so that it communicates with the Java parsing process and reads a result back from a unique file. Add template files for Java parsers. --- bitshift/parser/__init__.py | 26 +++++++++++++++++++--- parsers/src/org/bitshift/parsers/Parse.java | 0 .../src/org/bitshift/parsers/langs/CParser.java | 0 .../src/org/bitshift/parsers/langs/JavaParser.java | 0 .../org/bitshift/parsers/langs/PythonParser.java | 0 .../src/org/bitshift/parsers/langs/RubyParser.java | 0 6 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 parsers/src/org/bitshift/parsers/Parse.java create mode 100644 parsers/src/org/bitshift/parsers/langs/CParser.java create mode 100644 parsers/src/org/bitshift/parsers/langs/JavaParser.java create mode 100644 parsers/src/org/bitshift/parsers/langs/PythonParser.java create mode 100644 parsers/src/org/bitshift/parsers/langs/RubyParser.java diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 28d3e98..0fe4602 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,15 +1,19 @@ -import os +import os, ast _all__ = ["parse"] -def parse(codelet): +WRITE_F = "../../tmp/parser.proc" + +def parse(codelet, pid): """ Sends codelet code to the Java parsing process via a named pipe. Reads the resulting symbols from the pipe and updates the codelet. :param codelet: The codelet object to parsed. + :param pid: The id of the current python process. :type code: Codelet + :param pid: str. .. todo:: Create a named pipe for python process to communicate with Java @@ -19,5 +23,21 @@ def parse(codelet): Read the result from the named pipe and turn it into a dict. """ - pass + + with open(WRITE_F, 'a') as wf: + wf.write('pid:' + str(pid) + '\n') + wf.write('body:\n' + codelet.code) + + read_f = '../../tmp/%s_py.data' % str(pid) + data = '' + + while data == '': + with open(read_f) as rf: + data = rf.read() + + os.remove(read_f) + + results = data.split('\n') + codelet.language = results[0].split(',')[1] + codelet.symbols = ast.literal_eval(results[1].split(',')[1]) diff --git a/parsers/src/org/bitshift/parsers/Parse.java b/parsers/src/org/bitshift/parsers/Parse.java new file mode 100644 index 0000000..e69de29 diff --git a/parsers/src/org/bitshift/parsers/langs/CParser.java b/parsers/src/org/bitshift/parsers/langs/CParser.java new file mode 100644 index 0000000..e69de29 diff --git a/parsers/src/org/bitshift/parsers/langs/JavaParser.java b/parsers/src/org/bitshift/parsers/langs/JavaParser.java new file mode 100644 index 0000000..e69de29 diff --git a/parsers/src/org/bitshift/parsers/langs/PythonParser.java b/parsers/src/org/bitshift/parsers/langs/PythonParser.java new file mode 100644 index 0000000..e69de29 diff --git a/parsers/src/org/bitshift/parsers/langs/RubyParser.java b/parsers/src/org/bitshift/parsers/langs/RubyParser.java new file mode 100644 index 0000000..e69de29 From 63b09caa6cb3d1f9bc3dbddf49b9292ec619f2b6 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 18 Apr 2014 19:40:31 -0400 Subject: [PATCH 16/36] Changed directory structure of java parser. Decided on multiple parsers in different languages, refactored bitshift/parser to fit with that paradigm. --- bitshift/languages.py | 2 +- bitshift/parser/__init__.py | 55 ++++++++----- parsers/java/src/org/bitshift/parsing/Parse.java | 5 ++ .../src/org/bitshift/parsing/parsers/CParser.java | 7 ++ .../org/bitshift/parsing/parsers/JavaParser.java | 90 ++++++++++++++++++++++ .../src/org/bitshift/parsing/parsers/Parser.java | 17 ++++ .../src/org/bitshift/parsing/symbols/CSymbols.java | 1 + .../org/bitshift/parsing/symbols/JavaSymbols.java | 1 + .../src/org/bitshift/parsing/symbols/Symbols.java | 9 +++ parsers/src/org/bitshift/parsers/Parse.java | 0 .../src/org/bitshift/parsers/langs/CParser.java | 0 .../src/org/bitshift/parsers/langs/JavaParser.java | 0 .../org/bitshift/parsers/langs/PythonParser.java | 0 .../src/org/bitshift/parsers/langs/RubyParser.java | 0 14 files changed, 168 insertions(+), 19 deletions(-) create mode 100644 parsers/java/src/org/bitshift/parsing/Parse.java create mode 100644 parsers/java/src/org/bitshift/parsing/parsers/CParser.java create mode 100644 parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java create mode 100644 parsers/java/src/org/bitshift/parsing/parsers/Parser.java create mode 100644 parsers/java/src/org/bitshift/parsing/symbols/CSymbols.java create mode 100644 parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java create mode 100644 parsers/java/src/org/bitshift/parsing/symbols/Symbols.java delete mode 100644 parsers/src/org/bitshift/parsers/Parse.java delete mode 100644 parsers/src/org/bitshift/parsers/langs/CParser.java delete mode 100644 parsers/src/org/bitshift/parsers/langs/JavaParser.java delete mode 100644 parsers/src/org/bitshift/parsers/langs/PythonParser.java delete mode 100644 parsers/src/org/bitshift/parsers/langs/RubyParser.java diff --git a/bitshift/languages.py b/bitshift/languages.py index 6775711..b04c094 100644 --- a/bitshift/languages.py +++ b/bitshift/languages.py @@ -1,2 +1,2 @@ -LANGS = ["Python", "C"] +LANGS = ["Python", "C", "Java", "Ruby"] diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 0fe4602..6f2f898 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,8 +1,26 @@ import os, ast +import pygments.lexers as pgl +from ..languages import LANGS +from .python import parse_py _all__ = ["parse"] -WRITE_F = "../../tmp/parser.proc" +def _lang(codelet): + """ + Private function to identify the language of a codelet. + + :param codelet: The codelet object to identified. + + :type code: Codelet + + .. todo:: + Modify function to incorporate tags from stackoverflow. + """ + + if codelet.filename is not None: + return pgl.guess_lexer_for_filename(codelet.filename).name + + return LANGS.index(pgl.guess_lexer(codelet.code)) def parse(codelet, pid): """ @@ -16,28 +34,29 @@ def parse(codelet, pid): :param pid: str. .. todo:: - Create a named pipe for python process to communicate with Java - process. + Identify languages using pygments and change the write file based on + that. + """ - Send the process id and codelet code through the named pipe. + codelet.language = _lang(codelet) - Read the result from the named pipe and turn it into a dict. - """ + if codelet.language == LANGS.index("Python"): + parse_py(codelet) - with open(WRITE_F, 'a') as wf: - wf.write('pid:' + str(pid) + '\n') - wf.write('body:\n' + codelet.code) + else: + write_f = "../../tmp/%d_parser.proc" % codelet.language - read_f = '../../tmp/%s_py.data' % str(pid) - data = '' + with open(write_f, 'a') as wf: + wf.write('pid:' + str(pid) + '\n') + wf.write('body:\n' + codelet.code) - while data == '': - with open(read_f) as rf: - data = rf.read() + read_f = '../../tmp/%s_py.data' % str(pid) + data = '' - os.remove(read_f) + while data == '': + with open(read_f) as rf: + data = rf.read() - results = data.split('\n') - codelet.language = results[0].split(',')[1] - codelet.symbols = ast.literal_eval(results[1].split(',')[1]) + os.remove(read_f) + codelet.symbols = ast.literal_eval(data.split(',')[1]) diff --git a/parsers/java/src/org/bitshift/parsing/Parse.java b/parsers/java/src/org/bitshift/parsing/Parse.java new file mode 100644 index 0000000..cad9434 --- /dev/null +++ b/parsers/java/src/org/bitshift/parsing/Parse.java @@ -0,0 +1,5 @@ + + +public class Parse { + +} diff --git a/parsers/java/src/org/bitshift/parsing/parsers/CParser.java b/parsers/java/src/org/bitshift/parsing/parsers/CParser.java new file mode 100644 index 0000000..247f9ee --- /dev/null +++ b/parsers/java/src/org/bitshift/parsing/parsers/CParser.java @@ -0,0 +1,7 @@ +package org.bitshift.parsing.parsers; + +import org.bitshift.parsing.parsers.Parser; + +public class CParser extends Parser { + +} diff --git a/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java new file mode 100644 index 0000000..e33c8e4 --- /dev/null +++ b/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java @@ -0,0 +1,90 @@ +package org.bitshift.parsing.parsers; + +import java.util.Map; + +import org.eclipse.jdt.core.JavaCore; +import org.eclipse.jdt.core.dom.AST; +import org.eclipse.jdt.core.dom.ASTParser; +import org.eclipse.jdt.core.dom.ASTVisitor; +import org.eclipse.jdt.core.dom.CompilationUnit; +import org.eclipse.jdt.core.dom.Assignment; +import org.eclipse.jdt.core.dom.ClassInstanceCreation; +import org.eclipse.jdt.core.dom.FieldAccess +import org.eclipse.jdt.core.dom.FieldDeclaration; +import org.eclipse.jdt.core.dom.MethodDeclaration; +import org.eclipse.jdt.core.dom.MethodInvocation; +import org.eclipse.jdt.core.dom.PackageDeclaration; +import org.eclipse.jdt.core.dom.TypeDeclaration; +import org.eclipse.jdt.core.dom.VariableDeclarationStatement + +import org.bitshift.parsing.parsers.Parser; +import org.bitshift.parsing.symbols.Symbols; +import org.bitshift.parsing.symbols.JavaSymbols; + +public class JavaParser extends Parser { + + @Override + public Symbols genSymbols() { + char[] source = this.source.toCharArray(); + + ASTParser parser = ASTParser.newParser(AST.JLS3); + parser.setSource(source); + + Map options = JavaCore.getOptions(); + parser.setCompilerOptions(options); + + //Work on parsing partial java code later + CompilationUnit result = (CompilationUnit) parser.createAST(null); + + ASTVisitor visitor = new NodeVisitor(); + result.accept(visitor); + + return visitor.symbols; + } + + class NodeVisitor extends ASTVisitor { + + protected Symbols symbols; + + public NodeVisitor() { + symbols = new JavaSymbols(); + } + + public boolean visit(Assignment node) { + + } + + public boolean visit(ClassInstanceCreation node) { + + } + + public boolean visit(FieldAccess node) { + + } + + public boolean visit(FieldDeclaration node) { + + } + + public boolean visit(MethodDeclaration node) { + + } + + public boolean visit(MethodInvocation node) { + + } + + public boolean visit(PackageDeclaration node) { + + } + + public boolean visit(TypeDeclaration node) { + + } + + public boolean visit(VariableDeclarationStatement node) { + + } + + } +} diff --git a/parsers/java/src/org/bitshift/parsing/parsers/Parser.java b/parsers/java/src/org/bitshift/parsing/parsers/Parser.java new file mode 100644 index 0000000..ae74725 --- /dev/null +++ b/parsers/java/src/org/bitshift/parsing/parsers/Parser.java @@ -0,0 +1,17 @@ +package org.bitshift.parsing.parsers; + +import org.bitshift.parsing.symbols.Symbols; + +public class Parser { + + protected String source; + + public Parser(String source) { + + } + + public Symbols genSymbols() { + + } +} + diff --git a/parsers/java/src/org/bitshift/parsing/symbols/CSymbols.java b/parsers/java/src/org/bitshift/parsing/symbols/CSymbols.java new file mode 100644 index 0000000..f71667e --- /dev/null +++ b/parsers/java/src/org/bitshift/parsing/symbols/CSymbols.java @@ -0,0 +1 @@ +package org.bitshift.parsing.symbols; diff --git a/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java new file mode 100644 index 0000000..f71667e --- /dev/null +++ b/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java @@ -0,0 +1 @@ +package org.bitshift.parsing.symbols; diff --git a/parsers/java/src/org/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/org/bitshift/parsing/symbols/Symbols.java new file mode 100644 index 0000000..70762b1 --- /dev/null +++ b/parsers/java/src/org/bitshift/parsing/symbols/Symbols.java @@ -0,0 +1,9 @@ +package org.bitshift.parsing.symbols; + +public class Symbols { + + public Symbols() { + + } + +} diff --git a/parsers/src/org/bitshift/parsers/Parse.java b/parsers/src/org/bitshift/parsers/Parse.java deleted file mode 100644 index e69de29..0000000 diff --git a/parsers/src/org/bitshift/parsers/langs/CParser.java b/parsers/src/org/bitshift/parsers/langs/CParser.java deleted file mode 100644 index e69de29..0000000 diff --git a/parsers/src/org/bitshift/parsers/langs/JavaParser.java b/parsers/src/org/bitshift/parsers/langs/JavaParser.java deleted file mode 100644 index e69de29..0000000 diff --git a/parsers/src/org/bitshift/parsers/langs/PythonParser.java b/parsers/src/org/bitshift/parsers/langs/PythonParser.java deleted file mode 100644 index e69de29..0000000 diff --git a/parsers/src/org/bitshift/parsers/langs/RubyParser.java b/parsers/src/org/bitshift/parsers/langs/RubyParser.java deleted file mode 100644 index e69de29..0000000 From 669c30cac75b62604fcac7e0d1723ef80c2cc066 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Fri, 18 Apr 2014 22:13:21 -0400 Subject: [PATCH 17/36] Mod: Parse.java: Added comments JavaParser.java: Updated the genSymbols method and a private class 'NodeVisitor' which implements ASTVisitor. genSymbols returns an instance of the Symbols class containing all relevant data about the Java code. JavaSymbols.java: Add fields which map class, interface, method, field, and variable names to positions. --- bitshift/parser/pylj | 1 - parsers/java/src/org/bitshift/parsing/Parse.java | 3 +- .../org/bitshift/parsing/parsers/JavaParser.java | 67 +++++++++--- .../org/bitshift/parsing/symbols/JavaSymbols.java | 120 +++++++++++++++++++++ 4 files changed, 174 insertions(+), 17 deletions(-) delete mode 160000 bitshift/parser/pylj diff --git a/bitshift/parser/pylj b/bitshift/parser/pylj deleted file mode 160000 index 323dd4e..0000000 --- a/bitshift/parser/pylj +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 323dd4e266b47579aef2347e68214a6fbe083add diff --git a/parsers/java/src/org/bitshift/parsing/Parse.java b/parsers/java/src/org/bitshift/parsing/Parse.java index cad9434..9d8ade7 100644 --- a/parsers/java/src/org/bitshift/parsing/Parse.java +++ b/parsers/java/src/org/bitshift/parsing/Parse.java @@ -1,5 +1,6 @@ - +/*TODO: Create main method which will loop and check for updates to a file. + * If that file is updated, parse the input and print it out.*/ public class Parse { } diff --git a/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java index e33c8e4..39d4b47 100644 --- a/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java @@ -7,13 +7,13 @@ import org.eclipse.jdt.core.dom.AST; import org.eclipse.jdt.core.dom.ASTParser; import org.eclipse.jdt.core.dom.ASTVisitor; import org.eclipse.jdt.core.dom.CompilationUnit; -import org.eclipse.jdt.core.dom.Assignment; import org.eclipse.jdt.core.dom.ClassInstanceCreation; import org.eclipse.jdt.core.dom.FieldAccess import org.eclipse.jdt.core.dom.FieldDeclaration; import org.eclipse.jdt.core.dom.MethodDeclaration; import org.eclipse.jdt.core.dom.MethodInvocation; import org.eclipse.jdt.core.dom.PackageDeclaration; +import org.eclipse.jdt.core.dom.Statement; import org.eclipse.jdt.core.dom.TypeDeclaration; import org.eclipse.jdt.core.dom.VariableDeclarationStatement @@ -21,6 +21,8 @@ import org.bitshift.parsing.parsers.Parser; import org.bitshift.parsing.symbols.Symbols; import org.bitshift.parsing.symbols.JavaSymbols; +/*TODO: Work on parsing partial java code. + * Make sure all names of nodes are strings.*/ public class JavaParser extends Parser { @Override @@ -36,7 +38,7 @@ public class JavaParser extends Parser { //Work on parsing partial java code later CompilationUnit result = (CompilationUnit) parser.createAST(null); - ASTVisitor visitor = new NodeVisitor(); + ASTVisitor visitor = new NodeVisitor(result); result.accept(visitor); return visitor.symbols; @@ -45,45 +47,80 @@ public class JavaParser extends Parser { class NodeVisitor extends ASTVisitor { protected Symbols symbols; + protected CompilationUnit compUnit; - public NodeVisitor() { + public NodeVisitor(CompilationUnit compUnit) { symbols = new JavaSymbols(); } - public boolean visit(Assignment node) { - - } - public boolean visit(ClassInstanceCreation node) { - + String name = node.getType().getName(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; + int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertClassInstance(name, sl, sc, null, null); + return true; } public boolean visit(FieldAccess node) { - + String name = node.getName(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; + int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertFieldAccess(name, sl, sc, null, null); + return true; } public boolean visit(FieldDeclaration node) { - + String name = node.getType().getName(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; + int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertFieldDeclaration(name, sl, sc, null, null); + return true; } public boolean visit(MethodDeclaration node) { - + String name = node.getName(); + List statements = node.getBody().statements(); + Statement last = statements.get(statements.size() - 1); + + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; + int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + int el = compUnit.getLineNumber(last.getStartPosition()) - 1; + int ec = compUnit.getColumnNumber(last.getStartPosition()) - 1; + symbols.insertMethodDeclaration(name, sl, sc, el, ec); + return true; } public boolean visit(MethodInvocation node) { - + String name = node.getName(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; + int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertMethodInvocation(name, sl, sc, null, null); + return true; } public boolean visit(PackageDeclaration node) { - + symbols.setPackage(node.getName()); + return true; } public boolean visit(TypeDeclaration node) { - + String name = node.getName(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; + int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + if (node.isInterface()) { + symbols.insertInterfaceDeclaration(name, sl, sc, null, null); + } else { + symbols.insertClassDeclaration(name, sl, sc, null, null); + } + return true; } public boolean visit(VariableDeclarationStatement node) { - + String name = node.getType().getName(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; + int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertVariableDeclaration(name, sl, sc, null, null); + return true; } } diff --git a/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java index f71667e..4f39c95 100644 --- a/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java @@ -1 +1,121 @@ package org.bitshift.parsing.symbols; + +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.ArrayList; +import org.bitshift.parsing.symbols.Symbols; + +/*TODO: Overwrite toString*/ +public class JavaSymbols extends Symbols { + + private String _packageName; + private Map>> _classes; + private Map>> _interfaces; + private Map>> _methods; + private Map>> _fields; + private Map>> _vars; + + public JavaSymbols() { + _packageName = null; + _classes = new HashMap>>(); + _interfaces = new HashMap>>(); + _methods = new HashMap>>(); + _fields = new HashMap>>(); + _vars = new HashMap>>(); + } + + public boolean setPackage(String name) { + _packageName = name; + } + + public boolean insertClassDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(0, pos); + _classes.put(name, copy); + return true; + } + public boolean insertClassInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(pos); + _classes.put(name, copy); + return true; + } + + public boolean insertInterfaceDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(0, pos); + _classes.put(name, copy); + return true; + } + public boolean insertInterfaceInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(pos); + _classes.put(name, copy); + return true; + } + + public boolean insertMethodDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(0, pos); + _classes.put(name, copy); + return true; + } + public boolean insertMethodInvocation(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(pos); + _classes.put(name, copy); + return true; + } + + public boolean insertFieldDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(0, pos); + _classes.put(name, copy); + return true; + } + public boolean insertFieldAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(pos); + _classes.put(name, copy); + return true; + } + + public boolean insertVariableDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(0, pos); + _classes.put(name, copy); + return true; + } + public boolean insertVariableAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); + List> copy = _classes.get(name); + copy.add(pos); + _classes.put(name, copy); + return true; + } + + public String toString() { + + } +} + From 77e2b6f524bf5d6006fd8a42166abdde4095d672 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sat, 19 Apr 2014 15:26:38 -0400 Subject: [PATCH 18/36] Fix errors in java parser, mostly casting issues. In Parse.java, set up a tcp server for communication with python processes. Builds with maven --- .gitignore | 3 + parsers/java/pom.xml | 99 ++++++++++++++++++++++ .../src/main/java/org/bitshift/parsing/Parse.java | 40 +++++++++ .../org/bitshift/parsing/parsers/CParser.java | 4 - .../org/bitshift/parsing/parsers/JavaParser.java | 81 +++++++++++++----- .../java}/org/bitshift/parsing/parsers/Parser.java | 7 +- .../org/bitshift/parsing/symbols/CSymbols.java | 0 .../org/bitshift/parsing/symbols/JavaSymbols.java | 76 ++++++++++++----- .../org/bitshift/parsing/symbols/Symbols.java | 0 parsers/java/src/org/bitshift/parsing/Parse.java | 6 -- 10 files changed, 258 insertions(+), 58 deletions(-) create mode 100755 parsers/java/pom.xml create mode 100644 parsers/java/src/main/java/org/bitshift/parsing/Parse.java rename parsers/java/src/{ => main/java}/org/bitshift/parsing/parsers/CParser.java (66%) rename parsers/java/src/{ => main/java}/org/bitshift/parsing/parsers/JavaParser.java (57%) rename parsers/java/src/{ => main/java}/org/bitshift/parsing/parsers/Parser.java (63%) rename parsers/java/src/{ => main/java}/org/bitshift/parsing/symbols/CSymbols.java (100%) rename parsers/java/src/{ => main/java}/org/bitshift/parsing/symbols/JavaSymbols.java (65%) rename parsers/java/src/{ => main/java}/org/bitshift/parsing/symbols/Symbols.java (100%) delete mode 100644 parsers/java/src/org/bitshift/parsing/Parse.java diff --git a/.gitignore b/.gitignore index 4ce656b..8b72a6d 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,6 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# Maven +*/target/* diff --git a/parsers/java/pom.xml b/parsers/java/pom.xml new file mode 100755 index 0000000..81ed975 --- /dev/null +++ b/parsers/java/pom.xml @@ -0,0 +1,99 @@ + + + 4.0.0 + + 3.0.0 + + + org.bitshift.parsing + parsing + 0.1 + Java Parser for Bitshift + + + UTF-8 + UTF-8 + + + + + org.eclipse.jdt + core + 3.3.0-v_771 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 1.7 + 1.7 + UTF-8 + + + + org.apache.maven.plugins + maven-source-plugin + 2.2.1 + + + attach-sources + + jar + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + true + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.6 + + true + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + + + + org.bishift.parsing.Parse + + + + + + + + + diff --git a/parsers/java/src/main/java/org/bitshift/parsing/Parse.java b/parsers/java/src/main/java/org/bitshift/parsing/Parse.java new file mode 100644 index 0000000..593645b --- /dev/null +++ b/parsers/java/src/main/java/org/bitshift/parsing/Parse.java @@ -0,0 +1,40 @@ +import java.io.*; +import java.net.*; + +public class Parse { + + public static void main(String[][] args) { + String fromClient; + String toClient; + + try { + ServerSocket server = new ServerSocket(5002); + + while(true) { + Socket connected = server.accept(); + System.out.println("The client is connected."); + + BufferedReader clientReader = new BufferedReader( + new InputStreamReader(connected.getInputStream())); + + PrintWriter clientWriter = new PrintWriter( + connected.getOutputStream(), true); + + while(true) { + StringBuilder builder = new StringBuilder(); + + while((fromClient = clientReader.readLine()) != null) { + builder.append(fromClient); + } + + fromClient = builder.toString(); + + //Handle the data from the client here + } + } + } catch (IOException ex) { + + } + } + +} diff --git a/parsers/java/src/org/bitshift/parsing/parsers/CParser.java b/parsers/java/src/main/java/org/bitshift/parsing/parsers/CParser.java similarity index 66% rename from parsers/java/src/org/bitshift/parsing/parsers/CParser.java rename to parsers/java/src/main/java/org/bitshift/parsing/parsers/CParser.java index 247f9ee..9cd4308 100644 --- a/parsers/java/src/org/bitshift/parsing/parsers/CParser.java +++ b/parsers/java/src/main/java/org/bitshift/parsing/parsers/CParser.java @@ -1,7 +1,3 @@ package org.bitshift.parsing.parsers; import org.bitshift.parsing.parsers.Parser; - -public class CParser extends Parser { - -} diff --git a/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/org/bitshift/parsing/parsers/JavaParser.java similarity index 57% rename from parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java rename to parsers/java/src/main/java/org/bitshift/parsing/parsers/JavaParser.java index 39d4b47..bb7b7e4 100644 --- a/parsers/java/src/org/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/org/bitshift/parsing/parsers/JavaParser.java @@ -1,6 +1,9 @@ package org.bitshift.parsing.parsers; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Stack; import org.eclipse.jdt.core.JavaCore; import org.eclipse.jdt.core.dom.AST; @@ -8,23 +11,39 @@ import org.eclipse.jdt.core.dom.ASTParser; import org.eclipse.jdt.core.dom.ASTVisitor; import org.eclipse.jdt.core.dom.CompilationUnit; import org.eclipse.jdt.core.dom.ClassInstanceCreation; -import org.eclipse.jdt.core.dom.FieldAccess +import org.eclipse.jdt.core.dom.FieldAccess; import org.eclipse.jdt.core.dom.FieldDeclaration; import org.eclipse.jdt.core.dom.MethodDeclaration; import org.eclipse.jdt.core.dom.MethodInvocation; +import org.eclipse.jdt.core.dom.Name; import org.eclipse.jdt.core.dom.PackageDeclaration; +import org.eclipse.jdt.core.dom.QualifiedName; +import org.eclipse.jdt.core.dom.QualifiedType; +import org.eclipse.jdt.core.dom.SimpleName; +import org.eclipse.jdt.core.dom.SimpleType; import org.eclipse.jdt.core.dom.Statement; +import org.eclipse.jdt.core.dom.Type; import org.eclipse.jdt.core.dom.TypeDeclaration; -import org.eclipse.jdt.core.dom.VariableDeclarationStatement +import org.eclipse.jdt.core.dom.VariableDeclarationStatement; import org.bitshift.parsing.parsers.Parser; import org.bitshift.parsing.symbols.Symbols; import org.bitshift.parsing.symbols.JavaSymbols; /*TODO: Work on parsing partial java code. - * Make sure all names of nodes are strings.*/ + * Change visits to endVisit and implement a cache*/ public class JavaParser extends Parser { + protected JavaSymbols symbols; + protected CompilationUnit compUnit; + private Stack> _cache; + + public JavaParser(String source) { + super(source); + this.symbols = new JavaSymbols(); + this._cache = new Stack>(); + } + @Override public Symbols genSymbols() { char[] source = this.source.toCharArray(); @@ -36,49 +55,54 @@ public class JavaParser extends Parser { parser.setCompilerOptions(options); //Work on parsing partial java code later - CompilationUnit result = (CompilationUnit) parser.createAST(null); + this.compUnit = (CompilationUnit) parser.createAST(null); - ASTVisitor visitor = new NodeVisitor(result); - result.accept(visitor); + ASTVisitor visitor = new NodeVisitor(); + this.compUnit.accept(visitor); - return visitor.symbols; + return this.symbols; } class NodeVisitor extends ASTVisitor { - protected Symbols symbols; - protected CompilationUnit compUnit; - - public NodeVisitor(CompilationUnit compUnit) { - symbols = new JavaSymbols(); - } - public boolean visit(ClassInstanceCreation node) { - String name = node.getType().getName(); + Type typeObj = node.getType(); + Name nameObj = typeObj.isQualifiedType() ? ((QualifiedType) typeObj).getName() : ((SimpleType) typeObj).getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertClassInstance(name, sl, sc, null, null); return true; } public boolean visit(FieldAccess node) { - String name = node.getName(); + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertFieldAccess(name, sl, sc, null, null); return true; } public boolean visit(FieldDeclaration node) { - String name = node.getType().getName(); + Type typeObj = node.getType(); + Name nameObj = typeObj.isQualifiedType() ? ((QualifiedType) typeObj).getName() : ((SimpleType) typeObj).getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertFieldDeclaration(name, sl, sc, null, null); return true; } public boolean visit(MethodDeclaration node) { - String name = node.getName(); + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); List statements = node.getBody().statements(); Statement last = statements.get(statements.size() - 1); @@ -86,27 +110,37 @@ public class JavaParser extends Parser { int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; int el = compUnit.getLineNumber(last.getStartPosition()) - 1; int ec = compUnit.getColumnNumber(last.getStartPosition()) - 1; + symbols.insertMethodDeclaration(name, sl, sc, el, ec); return true; } public boolean visit(MethodInvocation node) { - String name = node.getName(); + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + symbols.insertMethodInvocation(name, sl, sc, null, null); return true; } public boolean visit(PackageDeclaration node) { - symbols.setPackage(node.getName()); + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); + + symbols.setPackage(name); return true; } public boolean visit(TypeDeclaration node) { - String name = node.getName(); + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; + if (node.isInterface()) { symbols.insertInterfaceDeclaration(name, sl, sc, null, null); } else { @@ -116,7 +150,10 @@ public class JavaParser extends Parser { } public boolean visit(VariableDeclarationStatement node) { - String name = node.getType().getName(); + Type typeObj = node.getType(); + Name nameObj = typeObj.isQualifiedType() ? ((QualifiedType) typeObj).getName() : ((SimpleType) typeObj).getName(); + String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); + int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; symbols.insertVariableDeclaration(name, sl, sc, null, null); diff --git a/parsers/java/src/org/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/org/bitshift/parsing/parsers/Parser.java similarity index 63% rename from parsers/java/src/org/bitshift/parsing/parsers/Parser.java rename to parsers/java/src/main/java/org/bitshift/parsing/parsers/Parser.java index ae74725..9b96a8d 100644 --- a/parsers/java/src/org/bitshift/parsing/parsers/Parser.java +++ b/parsers/java/src/main/java/org/bitshift/parsing/parsers/Parser.java @@ -2,16 +2,15 @@ package org.bitshift.parsing.parsers; import org.bitshift.parsing.symbols.Symbols; -public class Parser { +public abstract class Parser { protected String source; public Parser(String source) { - + this.source = source; } - public Symbols genSymbols() { + abstract Symbols genSymbols(); - } } diff --git a/parsers/java/src/org/bitshift/parsing/symbols/CSymbols.java b/parsers/java/src/main/java/org/bitshift/parsing/symbols/CSymbols.java similarity index 100% rename from parsers/java/src/org/bitshift/parsing/symbols/CSymbols.java rename to parsers/java/src/main/java/org/bitshift/parsing/symbols/CSymbols.java diff --git a/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/org/bitshift/parsing/symbols/JavaSymbols.java similarity index 65% rename from parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java rename to parsers/java/src/main/java/org/bitshift/parsing/symbols/JavaSymbols.java index 4f39c95..aa39cfd 100644 --- a/parsers/java/src/org/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/org/bitshift/parsing/symbols/JavaSymbols.java @@ -6,33 +6,38 @@ import java.util.HashMap; import java.util.ArrayList; import org.bitshift.parsing.symbols.Symbols; -/*TODO: Overwrite toString*/ +/*TODO: Overwrite toString + * Change instance vars to HashMaps of HashMaps*/ public class JavaSymbols extends Symbols { private String _packageName; - private Map>> _classes; - private Map>> _interfaces; - private Map>> _methods; - private Map>> _fields; - private Map>> _vars; + private Map _classes; + private Map _interfaces; + private Map _methods; + private Map _fields; + private Map _vars; public JavaSymbols() { _packageName = null; - _classes = new HashMap>>(); - _interfaces = new HashMap>>(); - _methods = new HashMap>>(); - _fields = new HashMap>>(); - _vars = new HashMap>>(); + _classes = new HashMap(); + _interfaces = new HashMap(); + _methods = new HashMap(); + _fields = new HashMap(); + _vars = new HashMap(); } public boolean setPackage(String name) { _packageName = name; + return true; } public boolean insertClassDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(0, pos); _classes.put(name, copy); return true; @@ -40,7 +45,10 @@ public class JavaSymbols extends Symbols { public boolean insertClassInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(pos); _classes.put(name, copy); return true; @@ -49,7 +57,10 @@ public class JavaSymbols extends Symbols { public boolean insertInterfaceDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(0, pos); _classes.put(name, copy); return true; @@ -57,7 +68,10 @@ public class JavaSymbols extends Symbols { public boolean insertInterfaceInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(pos); _classes.put(name, copy); return true; @@ -66,7 +80,10 @@ public class JavaSymbols extends Symbols { public boolean insertMethodDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(0, pos); _classes.put(name, copy); return true; @@ -74,7 +91,10 @@ public class JavaSymbols extends Symbols { public boolean insertMethodInvocation(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(pos); _classes.put(name, copy); return true; @@ -83,7 +103,10 @@ public class JavaSymbols extends Symbols { public boolean insertFieldDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(0, pos); _classes.put(name, copy); return true; @@ -91,7 +114,10 @@ public class JavaSymbols extends Symbols { public boolean insertFieldAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(pos); _classes.put(name, copy); return true; @@ -100,7 +126,10 @@ public class JavaSymbols extends Symbols { public boolean insertVariableDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(0, pos); _classes.put(name, copy); return true; @@ -108,14 +137,17 @@ public class JavaSymbols extends Symbols { public boolean insertVariableAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { List pos = new ArrayList(4); pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - List> copy = _classes.get(name); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + copy.add(pos); _classes.put(name, copy); return true; } public String toString() { - + return ""; } } diff --git a/parsers/java/src/org/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/main/java/org/bitshift/parsing/symbols/Symbols.java similarity index 100% rename from parsers/java/src/org/bitshift/parsing/symbols/Symbols.java rename to parsers/java/src/main/java/org/bitshift/parsing/symbols/Symbols.java diff --git a/parsers/java/src/org/bitshift/parsing/Parse.java b/parsers/java/src/org/bitshift/parsing/Parse.java deleted file mode 100644 index 9d8ade7..0000000 --- a/parsers/java/src/org/bitshift/parsing/Parse.java +++ /dev/null @@ -1,6 +0,0 @@ - -/*TODO: Create main method which will loop and check for updates to a file. - * If that file is updated, parse the input and print it out.*/ -public class Parse { - -} From 306875dae76a94c657c689f7e6792153297b69fe Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 01:41:55 -0400 Subject: [PATCH 19/36] Make Parser implement runnable so parsing tasks can be started in separate threads. Make Parser constructor accept a client socket, add reading and writing methods for the socket to JavaParser. Parse main method sets up a server for accepting parse jobs from the crawler, and starts threads for each parse task. --- parsers/java/pom.xml | 99 ++-------- .../src/main/java/com/bitshift/parsing/Parse.java | 33 ++++ .../java/com/bitshift/parsing/parsers/CParser.java | 3 + .../com/bitshift/parsing/parsers/JavaParser.java | 215 +++++++++++++++++++++ .../java/com/bitshift/parsing/parsers/Parser.java | 19 ++ .../com/bitshift/parsing/symbols/CSymbols.java | 1 + .../com/bitshift/parsing/symbols/JavaSymbols.java | 159 +++++++++++++++ .../java/com/bitshift/parsing/symbols/Symbols.java | 9 + 8 files changed, 460 insertions(+), 78 deletions(-) mode change 100755 => 100644 parsers/java/pom.xml create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/Parse.java create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java diff --git a/parsers/java/pom.xml b/parsers/java/pom.xml old mode 100755 new mode 100644 index 81ed975..340feb0 --- a/parsers/java/pom.xml +++ b/parsers/java/pom.xml @@ -1,22 +1,23 @@ - - + 4.0.0 - - 3.0.0 - - org.bitshift.parsing + com.bitshift.parsing parsing - 0.1 - Java Parser for Bitshift - - - UTF-8 - UTF-8 - + jar + 1.0-SNAPSHOT + parsing + http://maven.apache.org + junit + junit + 3.8.1 + test + + + org.eclipse.jdt core 3.3.0-v_771 @@ -26,74 +27,16 @@ - org.apache.maven.plugins - maven-compiler-plugin - 3.1 + org.codehaus.mojo + exec-maven-plugin + 1.2.1 - 1.7 - 1.7 - UTF-8 + com.bitshift.parsing.Parse + + - - org.apache.maven.plugins - maven-source-plugin - 2.2.1 - - - attach-sources - - jar - - - - - - org.apache.maven.plugins - maven-jar-plugin - 2.4 - - - - true - - - - - - org.apache.maven.plugins - maven-shade-plugin - 1.6 - - true - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - package - - shade - - - - - - org.bishift.parsing.Parse - - - - - - + diff --git a/parsers/java/src/main/java/com/bitshift/parsing/Parse.java b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java new file mode 100644 index 0000000..fc1d36f --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java @@ -0,0 +1,33 @@ +package com.bitshift.parsing; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.io.IOException; + +import java.net.ServerSocket; +import java.net.Socket; + +import com.bitshift.parsing.parsers.JavaParser; + +public class Parse { + + public static void main(String[] args) { + String fromClient; + String toClient; + + try { + ServerSocket server = new ServerSocket(5002); + + while(true) { + Socket clientSocket = server.accept(); + + JavaParser parser = new JavaParser(clientSocket); + Thread parserTask = new Thread(parser); + parserTask.start(); + } + } catch (IOException ex) { + } + } + +} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java new file mode 100644 index 0000000..dbe93fb --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/CParser.java @@ -0,0 +1,3 @@ +package com.bitshift.parsing.parsers; + +import com.bitshift.parsing.parsers.Parser; diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java new file mode 100644 index 0000000..c6d9b2a --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -0,0 +1,215 @@ +package com.bitshift.parsing.parsers; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.io.IOException; + +import java.net.Socket; + +import org.eclipse.jdt.core.JavaCore; +import org.eclipse.jdt.core.dom.AST; +import org.eclipse.jdt.core.dom.ASTParser; +import org.eclipse.jdt.core.dom.ASTVisitor; +import org.eclipse.jdt.core.dom.CompilationUnit; +import org.eclipse.jdt.core.dom.ClassInstanceCreation; +import org.eclipse.jdt.core.dom.FieldAccess; +import org.eclipse.jdt.core.dom.FieldDeclaration; +import org.eclipse.jdt.core.dom.MethodDeclaration; +import org.eclipse.jdt.core.dom.MethodInvocation; +import org.eclipse.jdt.core.dom.Name; +import org.eclipse.jdt.core.dom.PackageDeclaration; +import org.eclipse.jdt.core.dom.QualifiedName; +import org.eclipse.jdt.core.dom.QualifiedType; +import org.eclipse.jdt.core.dom.SimpleName; +import org.eclipse.jdt.core.dom.SimpleType; +import org.eclipse.jdt.core.dom.Statement; +import org.eclipse.jdt.core.dom.Type; +import org.eclipse.jdt.core.dom.TypeDeclaration; +import org.eclipse.jdt.core.dom.VariableDeclarationFragment; + +import com.bitshift.parsing.parsers.Parser; +import com.bitshift.parsing.symbols.Symbols; +import com.bitshift.parsing.symbols.JavaSymbols; + +/*TODO: Work on parsing partial java code. + * Change visits to endVisit and implement a cache for more concise code structure. + * Get rid of unecessary imports. + * Fix column and line numbers.*/ +public class JavaParser extends Parser { + + public JavaParser(Socket clientSocket) { + super(clientSocket); + } + + private String readFromClient() { + String fromClient = ""; + + try { + BufferedReader clientReader = new BufferedReader( + new InputStreamReader(this.clientSocket.getInputStream())); + + int bytes = Integer.parseInt(clientReader.readLine()); + System.out.println(bytes); + + StringBuilder builder = new StringBuilder(); + int i = 0; + + while(i < bytes) { + char aux = (char)clientReader.read(); + builder.append(aux); + i++; + } + + fromClient = builder.toString(); + + } catch (IOException ex) { + } + + return fromClient; + } + + private void writeToClient(String toClient) { + try { + PrintWriter clientWriter = new PrintWriter( + this.clientSocket.getOutputStream(), true); + + clientWriter.println(toClient); + } catch (IOException ex) { + } + } + + @Override + public Symbols genSymbols() { + char[] source = this.readFromClient().toCharArray(); + + ASTParser parser = ASTParser.newParser(AST.JLS3); + parser.setSource(source); + + Map options = JavaCore.getOptions(); + parser.setCompilerOptions(options); + + CompilationUnit root = (CompilationUnit) parser.createAST(null); + + NodeVisitor visitor = new NodeVisitor(root); + root.accept(visitor); + + return visitor.symbols; + } + + @Override + public void run() { + JavaSymbols symbols = (JavaSymbols) this.genSymbols(); + System.out.println(symbols.toString()); + writeToClient(symbols.toString()); + } + + class NodeVisitor extends ASTVisitor { + + protected CompilationUnit root; + protected JavaSymbols symbols; + private Stack> _cache; + + public NodeVisitor(CompilationUnit root) { + this.root = root; + this.symbols = new JavaSymbols(); + this._cache = new Stack>(); + } + + public boolean visit(ClassInstanceCreation node) { + return true; + } + + public boolean visit(FieldAccess node) { + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + + int sl = this.root.getLineNumber(node.getStartPosition()) - 1; + int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + + this.symbols.insertFieldAccess(name, sl, sc, null, null); + return true; + } + + public boolean visit(FieldDeclaration node) { + return true; + } + + public boolean visit(MethodDeclaration node) { + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + List statements = node.getBody().statements(); + Statement last = statements.get(statements.size() - 1); + + int sl = this.root.getLineNumber(node.getStartPosition()) - 1; + int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + int el = this.root.getLineNumber(last.getStartPosition()) - 1; + int ec = this.root.getColumnNumber(last.getStartPosition()) - 1; + + this.symbols.insertMethodDeclaration(name, sl, sc, el, ec); + return true; + } + + public boolean visit(MethodInvocation node) { + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + + int sl = this.root.getLineNumber(node.getStartPosition()) - 1; + int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + + this.symbols.insertMethodInvocation(name, sl, sc, null, null); + return true; + } + + public boolean visit(PackageDeclaration node) { + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + + this.symbols.setPackage(name); + return true; + } + + public boolean visit(TypeDeclaration node) { + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + + int sl = this.root.getLineNumber(node.getStartPosition()) - 1; + int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + + if (node.isInterface()) { + this.symbols.insertInterfaceDeclaration(name, sl, sc, null, null); + } else { + this.symbols.insertClassDeclaration(name, sl, sc, null, null); + } + return true; + } + + public boolean visit(VariableDeclarationFragment node) { + Name nameObj = node.getName(); + String name = nameObj.isQualifiedName() ? + ((QualifiedName) nameObj).getFullyQualifiedName() : + ((SimpleName) nameObj).getIdentifier(); + + int sl = this.root.getLineNumber(node.getStartPosition()) - 1; + int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + this.symbols.insertVariableDeclaration(name, sl, sc, null, null); + return true; + } + + } +} diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java new file mode 100644 index 0000000..7ce9b7c --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java @@ -0,0 +1,19 @@ +package com.bitshift.parsing.parsers; + +import java.net.Socket; +import com.bitshift.parsing.symbols.Symbols; + +public abstract class Parser implements Runnable { + + protected Socket clientSocket; + + public Parser(Socket clientSocket) { + this.clientSocket = clientSocket; + } + + abstract Symbols genSymbols(); + + public abstract void run(); + +} + diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java new file mode 100644 index 0000000..9abd60d --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/CSymbols.java @@ -0,0 +1 @@ +package com.bitshift.parsing.symbols; diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java new file mode 100644 index 0000000..09d32b4 --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -0,0 +1,159 @@ +package com.bitshift.parsing.symbols; + +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.ArrayList; +import com.bitshift.parsing.symbols.Symbols; + +/*TODO: Overwrite toString.*/ +public class JavaSymbols extends Symbols { + + private String _packageName; + private Map _classes; + private Map _interfaces; + private Map _methods; + private Map _fields; + private Map _vars; + + public JavaSymbols() { + _packageName = null; + _classes = new HashMap(); + _interfaces = new HashMap(); + _methods = new HashMap(); + _fields = new HashMap(); + _vars = new HashMap(); + } + + public boolean setPackage(String name) { + _packageName = name; + return true; + } + + public boolean insertClassDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(0, pos); + this._classes.put(name, copy); + return true; + } + public boolean insertClassInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(pos); + this._classes.put(name, copy); + return true; + } + + public boolean insertInterfaceDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(0, pos); + this._interfaces.put(name, copy); + return true; + } + public boolean insertInterfaceInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(pos); + this._interfaces.put(name, copy); + return true; + } + + public boolean insertMethodDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(0, pos); + this._methods.put(name, copy); + return true; + } + public boolean insertMethodInvocation(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(pos); + this._methods.put(name, copy); + return true; + } + + public boolean insertFieldDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(0, pos); + this._fields.put(name, copy); + return true; + } + public boolean insertFieldAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(pos); + this._fields.put(name, copy); + return true; + } + + public boolean insertVariableDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(0, pos); + this._vars.put(name, copy); + return true; + } + public boolean insertVariableAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + List pos = new ArrayList(4); + pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); + + List> copy = (List>)_classes.get(name); + copy = (copy == null) ? new ArrayList>() : copy; + + copy.add(pos); + this._vars.put(name, copy); + return true; + } + + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("classes:" + this._classes + ","); + builder.append("interfaces:" + this._interfaces + ","); + builder.append("methods:" + this._methods + ","); + builder.append("fields:" + this._fields + ","); + builder.append("vars:" + this._vars + ","); + + return "{" + builder.toString() + "}"; + } +} + diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java new file mode 100644 index 0000000..116dbd7 --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java @@ -0,0 +1,9 @@ +package com.bitshift.parsing.symbols; + +public abstract class Symbols { + + public Symbols() { + + } + +} From 19a5457f071fce4c55c3f14586430e7cd43e0e7b Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 01:42:57 -0400 Subject: [PATCH 20/36] Change director structure for java --- .../src/main/java/org/bitshift/parsing/Parse.java | 40 ----- .../java/org/bitshift/parsing/parsers/CParser.java | 3 - .../org/bitshift/parsing/parsers/JavaParser.java | 164 --------------------- .../java/org/bitshift/parsing/parsers/Parser.java | 16 -- .../org/bitshift/parsing/symbols/CSymbols.java | 1 - .../org/bitshift/parsing/symbols/JavaSymbols.java | 153 ------------------- .../java/org/bitshift/parsing/symbols/Symbols.java | 9 -- 7 files changed, 386 deletions(-) delete mode 100644 parsers/java/src/main/java/org/bitshift/parsing/Parse.java delete mode 100644 parsers/java/src/main/java/org/bitshift/parsing/parsers/CParser.java delete mode 100644 parsers/java/src/main/java/org/bitshift/parsing/parsers/JavaParser.java delete mode 100644 parsers/java/src/main/java/org/bitshift/parsing/parsers/Parser.java delete mode 100644 parsers/java/src/main/java/org/bitshift/parsing/symbols/CSymbols.java delete mode 100644 parsers/java/src/main/java/org/bitshift/parsing/symbols/JavaSymbols.java delete mode 100644 parsers/java/src/main/java/org/bitshift/parsing/symbols/Symbols.java diff --git a/parsers/java/src/main/java/org/bitshift/parsing/Parse.java b/parsers/java/src/main/java/org/bitshift/parsing/Parse.java deleted file mode 100644 index 593645b..0000000 --- a/parsers/java/src/main/java/org/bitshift/parsing/Parse.java +++ /dev/null @@ -1,40 +0,0 @@ -import java.io.*; -import java.net.*; - -public class Parse { - - public static void main(String[][] args) { - String fromClient; - String toClient; - - try { - ServerSocket server = new ServerSocket(5002); - - while(true) { - Socket connected = server.accept(); - System.out.println("The client is connected."); - - BufferedReader clientReader = new BufferedReader( - new InputStreamReader(connected.getInputStream())); - - PrintWriter clientWriter = new PrintWriter( - connected.getOutputStream(), true); - - while(true) { - StringBuilder builder = new StringBuilder(); - - while((fromClient = clientReader.readLine()) != null) { - builder.append(fromClient); - } - - fromClient = builder.toString(); - - //Handle the data from the client here - } - } - } catch (IOException ex) { - - } - } - -} diff --git a/parsers/java/src/main/java/org/bitshift/parsing/parsers/CParser.java b/parsers/java/src/main/java/org/bitshift/parsing/parsers/CParser.java deleted file mode 100644 index 9cd4308..0000000 --- a/parsers/java/src/main/java/org/bitshift/parsing/parsers/CParser.java +++ /dev/null @@ -1,3 +0,0 @@ -package org.bitshift.parsing.parsers; - -import org.bitshift.parsing.parsers.Parser; diff --git a/parsers/java/src/main/java/org/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/org/bitshift/parsing/parsers/JavaParser.java deleted file mode 100644 index bb7b7e4..0000000 --- a/parsers/java/src/main/java/org/bitshift/parsing/parsers/JavaParser.java +++ /dev/null @@ -1,164 +0,0 @@ -package org.bitshift.parsing.parsers; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; - -import org.eclipse.jdt.core.JavaCore; -import org.eclipse.jdt.core.dom.AST; -import org.eclipse.jdt.core.dom.ASTParser; -import org.eclipse.jdt.core.dom.ASTVisitor; -import org.eclipse.jdt.core.dom.CompilationUnit; -import org.eclipse.jdt.core.dom.ClassInstanceCreation; -import org.eclipse.jdt.core.dom.FieldAccess; -import org.eclipse.jdt.core.dom.FieldDeclaration; -import org.eclipse.jdt.core.dom.MethodDeclaration; -import org.eclipse.jdt.core.dom.MethodInvocation; -import org.eclipse.jdt.core.dom.Name; -import org.eclipse.jdt.core.dom.PackageDeclaration; -import org.eclipse.jdt.core.dom.QualifiedName; -import org.eclipse.jdt.core.dom.QualifiedType; -import org.eclipse.jdt.core.dom.SimpleName; -import org.eclipse.jdt.core.dom.SimpleType; -import org.eclipse.jdt.core.dom.Statement; -import org.eclipse.jdt.core.dom.Type; -import org.eclipse.jdt.core.dom.TypeDeclaration; -import org.eclipse.jdt.core.dom.VariableDeclarationStatement; - -import org.bitshift.parsing.parsers.Parser; -import org.bitshift.parsing.symbols.Symbols; -import org.bitshift.parsing.symbols.JavaSymbols; - -/*TODO: Work on parsing partial java code. - * Change visits to endVisit and implement a cache*/ -public class JavaParser extends Parser { - - protected JavaSymbols symbols; - protected CompilationUnit compUnit; - private Stack> _cache; - - public JavaParser(String source) { - super(source); - this.symbols = new JavaSymbols(); - this._cache = new Stack>(); - } - - @Override - public Symbols genSymbols() { - char[] source = this.source.toCharArray(); - - ASTParser parser = ASTParser.newParser(AST.JLS3); - parser.setSource(source); - - Map options = JavaCore.getOptions(); - parser.setCompilerOptions(options); - - //Work on parsing partial java code later - this.compUnit = (CompilationUnit) parser.createAST(null); - - ASTVisitor visitor = new NodeVisitor(); - this.compUnit.accept(visitor); - - return this.symbols; - } - - class NodeVisitor extends ASTVisitor { - - public boolean visit(ClassInstanceCreation node) { - Type typeObj = node.getType(); - Name nameObj = typeObj.isQualifiedType() ? ((QualifiedType) typeObj).getName() : ((SimpleType) typeObj).getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - - int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; - int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; - - symbols.insertClassInstance(name, sl, sc, null, null); - return true; - } - - public boolean visit(FieldAccess node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - - int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; - int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; - - symbols.insertFieldAccess(name, sl, sc, null, null); - return true; - } - - public boolean visit(FieldDeclaration node) { - Type typeObj = node.getType(); - Name nameObj = typeObj.isQualifiedType() ? ((QualifiedType) typeObj).getName() : ((SimpleType) typeObj).getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - - int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; - int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; - - symbols.insertFieldDeclaration(name, sl, sc, null, null); - return true; - } - - public boolean visit(MethodDeclaration node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - List statements = node.getBody().statements(); - Statement last = statements.get(statements.size() - 1); - - int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; - int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; - int el = compUnit.getLineNumber(last.getStartPosition()) - 1; - int ec = compUnit.getColumnNumber(last.getStartPosition()) - 1; - - symbols.insertMethodDeclaration(name, sl, sc, el, ec); - return true; - } - - public boolean visit(MethodInvocation node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - - int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; - int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; - - symbols.insertMethodInvocation(name, sl, sc, null, null); - return true; - } - - public boolean visit(PackageDeclaration node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - - symbols.setPackage(name); - return true; - } - - public boolean visit(TypeDeclaration node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - - int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; - int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; - - if (node.isInterface()) { - symbols.insertInterfaceDeclaration(name, sl, sc, null, null); - } else { - symbols.insertClassDeclaration(name, sl, sc, null, null); - } - return true; - } - - public boolean visit(VariableDeclarationStatement node) { - Type typeObj = node.getType(); - Name nameObj = typeObj.isQualifiedType() ? ((QualifiedType) typeObj).getName() : ((SimpleType) typeObj).getName(); - String name = nameObj.isQualifiedName() ? ((QualifiedName)nameObj).getFullyQualifiedName() : ((SimpleName)nameObj).getIdentifier(); - - int sl = compUnit.getLineNumber(node.getStartPosition()) - 1; - int sc = compUnit.getColumnNumber(node.getStartPosition()) - 1; - symbols.insertVariableDeclaration(name, sl, sc, null, null); - return true; - } - - } -} diff --git a/parsers/java/src/main/java/org/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/org/bitshift/parsing/parsers/Parser.java deleted file mode 100644 index 9b96a8d..0000000 --- a/parsers/java/src/main/java/org/bitshift/parsing/parsers/Parser.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.bitshift.parsing.parsers; - -import org.bitshift.parsing.symbols.Symbols; - -public abstract class Parser { - - protected String source; - - public Parser(String source) { - this.source = source; - } - - abstract Symbols genSymbols(); - -} - diff --git a/parsers/java/src/main/java/org/bitshift/parsing/symbols/CSymbols.java b/parsers/java/src/main/java/org/bitshift/parsing/symbols/CSymbols.java deleted file mode 100644 index f71667e..0000000 --- a/parsers/java/src/main/java/org/bitshift/parsing/symbols/CSymbols.java +++ /dev/null @@ -1 +0,0 @@ -package org.bitshift.parsing.symbols; diff --git a/parsers/java/src/main/java/org/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/org/bitshift/parsing/symbols/JavaSymbols.java deleted file mode 100644 index aa39cfd..0000000 --- a/parsers/java/src/main/java/org/bitshift/parsing/symbols/JavaSymbols.java +++ /dev/null @@ -1,153 +0,0 @@ -package org.bitshift.parsing.symbols; - -import java.util.List; -import java.util.Map; -import java.util.HashMap; -import java.util.ArrayList; -import org.bitshift.parsing.symbols.Symbols; - -/*TODO: Overwrite toString - * Change instance vars to HashMaps of HashMaps*/ -public class JavaSymbols extends Symbols { - - private String _packageName; - private Map _classes; - private Map _interfaces; - private Map _methods; - private Map _fields; - private Map _vars; - - public JavaSymbols() { - _packageName = null; - _classes = new HashMap(); - _interfaces = new HashMap(); - _methods = new HashMap(); - _fields = new HashMap(); - _vars = new HashMap(); - } - - public boolean setPackage(String name) { - _packageName = name; - return true; - } - - public boolean insertClassDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - _classes.put(name, copy); - return true; - } - public boolean insertClassInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - _classes.put(name, copy); - return true; - } - - public boolean insertInterfaceDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - _classes.put(name, copy); - return true; - } - public boolean insertInterfaceInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - _classes.put(name, copy); - return true; - } - - public boolean insertMethodDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - _classes.put(name, copy); - return true; - } - public boolean insertMethodInvocation(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - _classes.put(name, copy); - return true; - } - - public boolean insertFieldDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - _classes.put(name, copy); - return true; - } - public boolean insertFieldAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - _classes.put(name, copy); - return true; - } - - public boolean insertVariableDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - _classes.put(name, copy); - return true; - } - public boolean insertVariableAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.set(0, startLine); pos.set(1, startCol); pos.set(2, endLine); pos.set(3, endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - _classes.put(name, copy); - return true; - } - - public String toString() { - return ""; - } -} - diff --git a/parsers/java/src/main/java/org/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/main/java/org/bitshift/parsing/symbols/Symbols.java deleted file mode 100644 index 70762b1..0000000 --- a/parsers/java/src/main/java/org/bitshift/parsing/symbols/Symbols.java +++ /dev/null @@ -1,9 +0,0 @@ -package org.bitshift.parsing.symbols; - -public class Symbols { - - public Symbols() { - - } - -} From 2338887a52c419c98c7f87ce01d80189e003d12f Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 02:16:06 -0400 Subject: [PATCH 21/36] Working version of java parser up and running. --- .gitignore | 3 +- .../com/bitshift/parsing/parsers/JavaParser.java | 29 ++- .../com/bitshift/parsing/symbols/JavaSymbols.java | 16 +- test/java_parser_test.py | 18 ++ test/resources/Matrix.java | 218 +++++++++++++++++++++ 5 files changed, 259 insertions(+), 25 deletions(-) create mode 100644 test/java_parser_test.py create mode 100644 test/resources/Matrix.java diff --git a/.gitignore b/.gitignore index 8b72a6d..26d8b64 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,5 @@ nosetests.xml .pydevproject # Maven -*/target/* +parsers/java/target/* +*/tags diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java index c6d9b2a..be5fc37 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -39,8 +39,7 @@ import com.bitshift.parsing.symbols.JavaSymbols; /*TODO: Work on parsing partial java code. * Change visits to endVisit and implement a cache for more concise code structure. - * Get rid of unecessary imports. - * Fix column and line numbers.*/ + * Get rid of unecessary imports.*/ public class JavaParser extends Parser { public JavaParser(Socket clientSocket) { @@ -55,7 +54,6 @@ public class JavaParser extends Parser { new InputStreamReader(this.clientSocket.getInputStream())); int bytes = Integer.parseInt(clientReader.readLine()); - System.out.println(bytes); StringBuilder builder = new StringBuilder(); int i = 0; @@ -105,7 +103,6 @@ public class JavaParser extends Parser { @Override public void run() { JavaSymbols symbols = (JavaSymbols) this.genSymbols(); - System.out.println(symbols.toString()); writeToClient(symbols.toString()); } @@ -131,8 +128,8 @@ public class JavaParser extends Parser { ((QualifiedName) nameObj).getFullyQualifiedName() : ((SimpleName) nameObj).getIdentifier(); - int sl = this.root.getLineNumber(node.getStartPosition()) - 1; - int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); this.symbols.insertFieldAccess(name, sl, sc, null, null); return true; @@ -150,10 +147,10 @@ public class JavaParser extends Parser { List statements = node.getBody().statements(); Statement last = statements.get(statements.size() - 1); - int sl = this.root.getLineNumber(node.getStartPosition()) - 1; - int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; - int el = this.root.getLineNumber(last.getStartPosition()) - 1; - int ec = this.root.getColumnNumber(last.getStartPosition()) - 1; + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); + int el = this.root.getLineNumber(last.getStartPosition()); + int ec = this.root.getColumnNumber(last.getStartPosition()); this.symbols.insertMethodDeclaration(name, sl, sc, el, ec); return true; @@ -165,8 +162,8 @@ public class JavaParser extends Parser { ((QualifiedName) nameObj).getFullyQualifiedName() : ((SimpleName) nameObj).getIdentifier(); - int sl = this.root.getLineNumber(node.getStartPosition()) - 1; - int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); this.symbols.insertMethodInvocation(name, sl, sc, null, null); return true; @@ -188,8 +185,8 @@ public class JavaParser extends Parser { ((QualifiedName) nameObj).getFullyQualifiedName() : ((SimpleName) nameObj).getIdentifier(); - int sl = this.root.getLineNumber(node.getStartPosition()) - 1; - int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); if (node.isInterface()) { this.symbols.insertInterfaceDeclaration(name, sl, sc, null, null); @@ -205,8 +202,8 @@ public class JavaParser extends Parser { ((QualifiedName) nameObj).getFullyQualifiedName() : ((SimpleName) nameObj).getIdentifier(); - int sl = this.root.getLineNumber(node.getStartPosition()) - 1; - int sc = this.root.getColumnNumber(node.getStartPosition()) - 1; + int sl = this.root.getLineNumber(node.getStartPosition()); + int sc = this.root.getColumnNumber(node.getStartPosition()); this.symbols.insertVariableDeclaration(name, sl, sc, null, null); return true; } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java index 09d32b4..ef8efb5 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -57,7 +57,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_interfaces.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(0, pos); @@ -68,7 +68,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_interfaces.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(pos); @@ -80,7 +80,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_methods.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(0, pos); @@ -91,7 +91,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_methods.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(pos); @@ -103,7 +103,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_fields.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(0, pos); @@ -114,7 +114,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_fields.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(pos); @@ -126,7 +126,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_vars.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(0, pos); @@ -137,7 +137,7 @@ public class JavaSymbols extends Symbols { List pos = new ArrayList(4); pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - List> copy = (List>)_classes.get(name); + List> copy = (List>)_vars.get(name); copy = (copy == null) ? new ArrayList>() : copy; copy.add(pos); diff --git a/test/java_parser_test.py b/test/java_parser_test.py new file mode 100644 index 0000000..7f87f2c --- /dev/null +++ b/test/java_parser_test.py @@ -0,0 +1,18 @@ +import socket + +client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +client_socket.connect(("localhost", 5002)) + +with open("resources/Matrix.java", "r") as java_file: + source = java_file.read() + "\nEOS_BITSHIFT" + client_socket.send("%d\n%s" % (len(source), source)); + +data = '' +while True: + data = client_socket.recv(10000) + + if data != '': + client_socket.close() + break; + +print data; diff --git a/test/resources/Matrix.java b/test/resources/Matrix.java new file mode 100644 index 0000000..5d641e0 --- /dev/null +++ b/test/resources/Matrix.java @@ -0,0 +1,218 @@ +package battlechap; + +import java.io.PrintStream; + +public class Matrix { + private Object[][] _datmatrix; + + public Matrix(int paramInt){ + this._datmatrix = new Object[paramInt][paramInt]; + } + + public int size() { + return this._datmatrix.length; + } + + public Object get(int paramInt1, int paramInt2) { + return this._datmatrix[paramInt1][paramInt2]; + } + + public boolean isEmpty(int paramInt1, int paramInt2) { + return this._datmatrix[paramInt1][paramInt2] == null; + } + + public boolean equals(Object paramObject) { + boolean bool = true; + if ((paramObject instanceof Matrix)) { + Matrix localMatrix = (Matrix)paramObject; + if (localMatrix.size() == size()) { + for (int i = 0; i < size(); i++) { + for (int j = 0; j < size(); j++) { + if (!localMatrix.get(i, j).equals(get(i, j))) { + bool = false; + break; + } + } + if (!bool) + break; + } + } + else + bool = false; + } + else + { + bool = false; + } + return bool; + } + + public Object set(int paramInt1, int paramInt2, Object paramObject) { + Object localObject = this._datmatrix[paramInt1][paramInt2]; + this._datmatrix[paramInt1][paramInt2] = paramObject; + return localObject; + } + + public void transpose() { + int i = 0; + for (int j = 0; j < size(); j++) { + for (int k = i; k < size(); k++) { + set(j, k, set(k, j, get(j, k))); + } + i++; + } + } + + public static void swapRows(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject[paramInt1].length; i++) { + Object localObject = paramArrayOfObject[paramInt1][i]; + paramArrayOfObject[paramInt1][i] = paramArrayOfObject[paramInt2][i]; + paramArrayOfObject[paramInt2][i] = localObject; + } + } + + public static void swapCols(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject.length; i++) { + Object localObject = paramArrayOfObject[i][paramInt1]; + paramArrayOfObject[i][paramInt1] = paramArrayOfObject[i][paramInt2]; + paramArrayOfObject[i][paramInt2] = localObject; + } + } + + public Object[] getRow(int paramInt) { + Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length]; + for (int i = 0; i < arrayOfObject.length; i++) { + arrayOfObject[i] = this._datmatrix[paramInt][i]; + } + return arrayOfObject; + } + + public Object[] getCol(int paramInt) { + Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length]; + for (int i = 0; i < arrayOfObject.length; i++) { + arrayOfObject[i] = this._datmatrix[i][paramInt]; + } + return arrayOfObject; + } + + public Object[] setRow(int paramInt, Object[] paramArrayOfObject) { + Object[] arrayOfObject = getRow(paramInt); + + for (int i = 0; i < size(); i++) { + set(paramInt, i, paramArrayOfObject[i]); + } + + return arrayOfObject; + } + + public Object[] setCol(int paramInt, Object[] paramArrayOfObject) { + Object[] arrayOfObject = getCol(paramInt); + + for (int i = 0; i < size(); i++) { + set(i, paramInt, paramArrayOfObject[i]); + } + + return arrayOfObject; + } + + public String toString() + { + String str1 = ""; + for (int i = 0; i < this._datmatrix.length; i++) { + if (i < 9) + str1 = str1 + (i + 1) + ": "; + else + str1 = str1 + (i + 1) + ":"; + for (int j = 0; j < this._datmatrix[i].length; j++) { + int k = (this._datmatrix[i][j] + "").length(); + String str2 = " ".substring(k); + str1 = str1 + this._datmatrix[i][j] + str2; + } + str1 = str1 + "\n"; + } + return str1; + } + + public static void print(Object[][] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject.length; i++) { + for (int j = 0; j < paramArrayOfObject[i].length; j++) { + int k = (paramArrayOfObject[i][j] + "").length(); + String str = " ".substring(k); + System.out.print(paramArrayOfObject[i][j] + str); + } + System.out.print("\n"); + } + } + + public static void printArray(Object[] paramArrayOfObject) { + for (int i = 0; i < paramArrayOfObject.length; i++) { + int j = (paramArrayOfObject[i] + "").length(); + String str = " ".substring(j); + System.out.print(paramArrayOfObject[i] + str); + } + System.out.print("\n"); + } + + public static void main(String[] paramArrayOfString) { + Matrix localMatrix1 = new Matrix(5); + Matrix localMatrix2 = new Matrix(5); + for (int i = 0; i < localMatrix1.size(); i++) { + for (int j = 0; j < localMatrix1.size(); j++) { + Integer localInteger1 = new Integer((int)(Math.random() * 20.0D)); + localMatrix1.set(i, j, localInteger1); + localMatrix2.set(i, j, localInteger1); + } + } + + System.out.println("\nDemonstrating equals method (should be true)\t" + localMatrix2.equals(localMatrix1) + "\n"); + + System.out.println("Demonstrating get method\n" + localMatrix1.get(0, 0) + "\n"); + System.out.println("Demonstrating is empty method\n" + localMatrix1.isEmpty(1, 0) + "\n"); + System.out.println("Demonstrating size method \n" + localMatrix1.size() + "\n"); + System.out.println("Demonstrating toString method\n" + localMatrix1 + "\n"); + localMatrix1.transpose(); + System.out.println("Blop has been transposed\n" + localMatrix1 + "\n"); + + Object[][] arrayOfObject = new Object[4][4]; + for (int j = 0; j < arrayOfObject.length; j++) { + for (int k = 0; k < arrayOfObject[j].length; k++) { + Integer localInteger2 = new Integer((int)(Math.random() * 20.0D)); + arrayOfObject[j][k] = localInteger2; + } + } + System.out.println("\n\n**Swapping Rows Demo**"); + print(arrayOfObject); + System.out.println("\nRows 1 and 2 have been Swapped \n"); + swapRows(1, 2, arrayOfObject); + print(arrayOfObject); + + System.out.println("\n**Swapping Columns Demo**"); + print(arrayOfObject); + System.out.println("\n\nColumns 1 and 2 have been Swapped \n"); + swapCols(1, 2, arrayOfObject); + print(arrayOfObject); + + System.out.println("\n**Getting rows demo (from blop)**"); + System.out.println(localMatrix1); + System.out.println("\nGetting row 1\n"); + printArray(localMatrix1.getRow(1)); + + System.out.println("\n**Getting cols demo (from blop)**"); + System.out.println(localMatrix1); + System.out.println("\nGetting col 1\n"); + printArray(localMatrix1.getCol(1)); + + System.out.println("\n**Demonstrating set row method**"); + System.out.println(localMatrix1); + System.out.println("\nSwitching row 1 of blop to 1st column of blop\n"); + localMatrix1.setRow(1, localMatrix1.getCol(1)); + System.out.println(localMatrix1 + "\n"); + + System.out.println("\n**Demonstrating set col method**"); + System.out.println(localMatrix1); + System.out.println("\nSwitching col 1 of blop to 2nd row of blop\n"); + localMatrix1.setCol(1, localMatrix1.getRow(2)); + System.out.println(localMatrix1 + "\n"); + } +} + From f451e426e0ec604ff87d1b354bbd5788a33cc329 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 12:14:38 -0400 Subject: [PATCH 22/36] Refactor of the Java Parser Mod: Parser.java: - Moved client reading and writing methods to the abstract parser class, so that it is not specific to the JavaParser JavaParser.java: - Implemented NodeVisitor._cache. The cache is a stack of data packets. When a node that we want information on is first visited, a new packet of data is pushed onto the stack. The child nodes of that original node than add information to the packet, and when the original node is traversed again on the way up the tree, the data is popped from the cache and added to the symbols. This makes it possible to gather information about various levels of the tree easily. JavaSymbols.java: - Refactor all the insertMethods to simply add a packet of data to the appropriate HashMap. Symbols.java - Add a createCoord method which returns an arraylist representing a point in a document. --- .../com/bitshift/parsing/parsers/JavaParser.java | 162 +++++++++++--------- .../java/com/bitshift/parsing/parsers/Parser.java | 44 +++++- .../com/bitshift/parsing/symbols/JavaSymbols.java | 169 ++++++++------------- .../java/com/bitshift/parsing/symbols/Symbols.java | 8 + 4 files changed, 201 insertions(+), 182 deletions(-) diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java index be5fc37..d7e1c10 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -5,11 +5,6 @@ import java.util.List; import java.util.Map; import java.util.Stack; -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.io.IOException; - import java.net.Socket; import org.eclipse.jdt.core.JavaCore; @@ -46,44 +41,8 @@ public class JavaParser extends Parser { super(clientSocket); } - private String readFromClient() { - String fromClient = ""; - - try { - BufferedReader clientReader = new BufferedReader( - new InputStreamReader(this.clientSocket.getInputStream())); - - int bytes = Integer.parseInt(clientReader.readLine()); - - StringBuilder builder = new StringBuilder(); - int i = 0; - - while(i < bytes) { - char aux = (char)clientReader.read(); - builder.append(aux); - i++; - } - - fromClient = builder.toString(); - - } catch (IOException ex) { - } - - return fromClient; - } - - private void writeToClient(String toClient) { - try { - PrintWriter clientWriter = new PrintWriter( - this.clientSocket.getOutputStream(), true); - - clientWriter.println(toClient); - } catch (IOException ex) { - } - } - @Override - public Symbols genSymbols() { + protected Symbols genSymbols() { char[] source = this.readFromClient().toCharArray(); ASTParser parser = ASTParser.newParser(AST.JLS3); @@ -118,28 +77,24 @@ public class JavaParser extends Parser { this._cache = new Stack>(); } - public boolean visit(ClassInstanceCreation node) { - return true; - } - - public boolean visit(FieldAccess node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? - ((QualifiedName) nameObj).getFullyQualifiedName() : - ((SimpleName) nameObj).getIdentifier(); - + public boolean visit(FieldDeclaration node) { + HashMap data = new HashMap(); int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - this.symbols.insertFieldAccess(name, sl, sc, null, null); + data.put("coord", Symbols.createCoord(sl, sc, null, null)); + this._cache.push(data); return true; } - public boolean visit(FieldDeclaration node) { - return true; + public void endVisit(FieldDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertFieldDeclaration(name, data); } public boolean visit(MethodDeclaration node) { + HashMap data = new HashMap(); Name nameObj = node.getName(); String name = nameObj.isQualifiedName() ? ((QualifiedName) nameObj).getFullyQualifiedName() : @@ -152,59 +107,114 @@ public class JavaParser extends Parser { int el = this.root.getLineNumber(last.getStartPosition()); int ec = this.root.getColumnNumber(last.getStartPosition()); - this.symbols.insertMethodDeclaration(name, sl, sc, el, ec); + data.put("coord", Symbols.createCoord(sl, sc, null, null)); + data.put("name", name); + this._cache.push(data); return true; } + public void endVisit(MethodDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertMethodDeclaration(name, data); + } + public boolean visit(MethodInvocation node) { + HashMap data = new HashMap(); Name nameObj = node.getName(); String name = nameObj.isQualifiedName() ? ((QualifiedName) nameObj).getFullyQualifiedName() : ((SimpleName) nameObj).getIdentifier(); - int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - this.symbols.insertMethodInvocation(name, sl, sc, null, null); + data.put("coord", Symbols.createCoord(sl, sc, null, null)); + data.put("name", name); + this._cache.push(data); return true; } + public void endVisit(MethodInvocation node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertMethodInvocation(name, data); + } + public boolean visit(PackageDeclaration node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? - ((QualifiedName) nameObj).getFullyQualifiedName() : - ((SimpleName) nameObj).getIdentifier(); + HashMap data = new HashMap(); + this._cache.push(data); + return true; + } + public void endVisit(PackageDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); this.symbols.setPackage(name); - return true; } public boolean visit(TypeDeclaration node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? - ((QualifiedName) nameObj).getFullyQualifiedName() : - ((SimpleName) nameObj).getIdentifier(); + HashMap data = new HashMap(); int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); + data.put("coord", Symbols.createCoord(sl, sc, null, null)); + this._cache.push(data); + return true; + } + + public void endVisit(TypeDeclaration node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + if (node.isInterface()) { - this.symbols.insertInterfaceDeclaration(name, sl, sc, null, null); + this.symbols.insertInterfaceDeclaration(name, data); } else { - this.symbols.insertClassDeclaration(name, sl, sc, null, null); + this.symbols.insertClassDeclaration(name, data); } - return true; } public boolean visit(VariableDeclarationFragment node) { - Name nameObj = node.getName(); - String name = nameObj.isQualifiedName() ? - ((QualifiedName) nameObj).getFullyQualifiedName() : - ((SimpleName) nameObj).getIdentifier(); - + HashMap data = new HashMap(); int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - this.symbols.insertVariableDeclaration(name, sl, sc, null, null); + + data.put("coord", Symbols.createCoord(sl, sc, null, null)); + this._cache.push(data); + return true; + } + + public void endVisit(VariableDeclarationFragment node) { + HashMap data = this._cache.pop(); + String name = (String)data.remove("name"); + this.symbols.insertVariableDeclaration(name, data); + } + + public boolean visit(QualifiedName node) { + if (!this._cache.empty()) { + HashMap data = this._cache.pop(); + + if(!data.containsKey("name")) { + String name = node.getFullyQualifiedName(); + data.put("name", name); + } + + this._cache.push(data); + } + return true; + } + + public boolean visit(SimpleName node) { + if (!this._cache.empty()) { + HashMap data = this._cache.pop(); + + if(!data.containsKey("name")) { + String name = node.getIdentifier(); + data.put("name", name); + } + + this._cache.push(data); + } return true; } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java index 7ce9b7c..088c185 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java @@ -1,6 +1,12 @@ package com.bitshift.parsing.parsers; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.io.IOException; + import java.net.Socket; + import com.bitshift.parsing.symbols.Symbols; public abstract class Parser implements Runnable { @@ -11,7 +17,43 @@ public abstract class Parser implements Runnable { this.clientSocket = clientSocket; } - abstract Symbols genSymbols(); + protected String readFromClient() { + String fromClient = ""; + + try { + BufferedReader clientReader = new BufferedReader( + new InputStreamReader(this.clientSocket.getInputStream())); + + int bytes = Integer.parseInt(clientReader.readLine()); + + StringBuilder builder = new StringBuilder(); + int i = 0; + + while(i < bytes) { + char aux = (char)clientReader.read(); + builder.append(aux); + i++; + } + + fromClient = builder.toString(); + + } catch (IOException ex) { + } + + return fromClient; + } + + protected void writeToClient(String toClient) { + try { + PrintWriter clientWriter = new PrintWriter( + this.clientSocket.getOutputStream(), true); + + clientWriter.println(toClient); + } catch (IOException ex) { + } + } + + protected abstract Symbols genSymbols(); public abstract void run(); diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java index ef8efb5..bec9c0f 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -1,7 +1,6 @@ package com.bitshift.parsing.symbols; import java.util.List; -import java.util.Map; import java.util.HashMap; import java.util.ArrayList; import com.bitshift.parsing.symbols.Symbols; @@ -10,19 +9,19 @@ import com.bitshift.parsing.symbols.Symbols; public class JavaSymbols extends Symbols { private String _packageName; - private Map _classes; - private Map _interfaces; - private Map _methods; - private Map _fields; - private Map _vars; + private HashMap> _classes; + private HashMap> _interfaces; + private HashMap> _methods; + private HashMap> _fields; + private HashMap> _vars; public JavaSymbols() { _packageName = null; - _classes = new HashMap(); - _interfaces = new HashMap(); - _methods = new HashMap(); - _fields = new HashMap(); - _vars = new HashMap(); + _classes = new HashMap>(); + _interfaces = new HashMap>(); + _methods = new HashMap>(); + _fields = new HashMap>(); + _vars = new HashMap>(); } public boolean setPackage(String name) { @@ -30,118 +29,78 @@ public class JavaSymbols extends Symbols { return true; } - public boolean insertClassDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - this._classes.put(name, copy); - return true; - } - public boolean insertClassInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_classes.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - this._classes.put(name, copy); + public boolean insertClassDeclaration(String name, HashMap data) { + this._classes.put(name, data); return true; } - public boolean insertInterfaceDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_interfaces.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - this._interfaces.put(name, copy); - return true; - } - public boolean insertInterfaceInstance(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_interfaces.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - this._interfaces.put(name, copy); + public boolean insertInterfaceDeclaration(String name, HashMap data) { + this._interfaces.put(name, data); return true; } - public boolean insertMethodDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_methods.get(name); - copy = (copy == null) ? new ArrayList>() : copy; + public boolean insertMethodDeclaration(String name, HashMap data) { + HashMap method = this._methods.get(name); + if (method == null) { + method = new HashMap(); + method.put("declaration", data); + } else { + method.put("declaration", data); + } - copy.add(0, pos); - this._methods.put(name, copy); + this._methods.put(name, method); return true; } - public boolean insertMethodInvocation(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_methods.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - this._methods.put(name, copy); + public boolean insertMethodInvocation(String name, HashMap data) { + HashMap method = this._methods.get(name); + if (method == null) { + method = new HashMap(); + ArrayList calls = new ArrayList(10); + calls.add(data); + method.put("calls", calls); + } else { + ArrayList calls = (ArrayList)method.get("calls"); + calls = (calls == null) ? new ArrayList(10) : calls; + calls.add(data); + method.put("calls", calls); + } + + this._methods.put(name, method); return true; } - public boolean insertFieldDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_fields.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(0, pos); - this._fields.put(name, copy); - return true; - } - public boolean insertFieldAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_fields.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - this._fields.put(name, copy); + public boolean insertFieldDeclaration(String name, HashMap data) { + this._fields.put(name, data); return true; } - public boolean insertVariableDeclaration(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_vars.get(name); - copy = (copy == null) ? new ArrayList>() : copy; + public boolean insertVariableDeclaration(String name, HashMap data) { + HashMap var = this._vars.get(name); + if (var == null) { + var = new HashMap(); + var.put("declaration", data); + } else { + var.put("declaration", data); + } - copy.add(0, pos); - this._vars.put(name, copy); + this._vars.put(name, var); return true; } - public boolean insertVariableAccess(String name, Integer startLine, Integer startCol, Integer endLine, Integer endCol) { - List pos = new ArrayList(4); - pos.add(startLine); pos.add(startCol); pos.add(endLine); pos.add(endCol); - - List> copy = (List>)_vars.get(name); - copy = (copy == null) ? new ArrayList>() : copy; - - copy.add(pos); - this._vars.put(name, copy); + public boolean insertVariableAccess(String name, HashMap data) { + HashMap var = this._vars.get(name); + if (var == null) { + var = new HashMap(); + ArrayList uses = new ArrayList(10); + uses.add(data); + var.put("uses", uses); + } else { + ArrayList uses = (ArrayList)var.get("uses"); + uses = (uses == null) ? new ArrayList(10) : uses; + uses.add(data); + var.put("uses", uses); + } + + this._vars.put(name, var); return true; } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java index 116dbd7..8bbf44d 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java @@ -1,9 +1,17 @@ package com.bitshift.parsing.symbols; +import java.util.ArrayList; + public abstract class Symbols { public Symbols() { } + public static ArrayList createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) { + ArrayList coord = new ArrayList(4); + coord.add(startLine); coord.add(startCol); coord.add(endLine); coord.add(endCol); + return coord; + } + } From 64ef9b04f23348175d0c6b060a31e61e3e19b2e1 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 12:22:22 -0400 Subject: [PATCH 23/36] Remove unecessary imports --- .../src/main/java/com/bitshift/parsing/parsers/JavaParser.java | 8 +------- .../src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java index d7e1c10..48287aa 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -13,18 +13,14 @@ import org.eclipse.jdt.core.dom.ASTParser; import org.eclipse.jdt.core.dom.ASTVisitor; import org.eclipse.jdt.core.dom.CompilationUnit; import org.eclipse.jdt.core.dom.ClassInstanceCreation; -import org.eclipse.jdt.core.dom.FieldAccess; import org.eclipse.jdt.core.dom.FieldDeclaration; import org.eclipse.jdt.core.dom.MethodDeclaration; import org.eclipse.jdt.core.dom.MethodInvocation; import org.eclipse.jdt.core.dom.Name; import org.eclipse.jdt.core.dom.PackageDeclaration; import org.eclipse.jdt.core.dom.QualifiedName; -import org.eclipse.jdt.core.dom.QualifiedType; import org.eclipse.jdt.core.dom.SimpleName; -import org.eclipse.jdt.core.dom.SimpleType; import org.eclipse.jdt.core.dom.Statement; -import org.eclipse.jdt.core.dom.Type; import org.eclipse.jdt.core.dom.TypeDeclaration; import org.eclipse.jdt.core.dom.VariableDeclarationFragment; @@ -32,9 +28,7 @@ import com.bitshift.parsing.parsers.Parser; import com.bitshift.parsing.symbols.Symbols; import com.bitshift.parsing.symbols.JavaSymbols; -/*TODO: Work on parsing partial java code. - * Change visits to endVisit and implement a cache for more concise code structure. - * Get rid of unecessary imports.*/ +/*TODO: Work on parsing partial java code.*/ public class JavaParser extends Parser { public JavaParser(Socket clientSocket) { diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java index bec9c0f..9265feb 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -1,6 +1,5 @@ package com.bitshift.parsing.symbols; -import java.util.List; import java.util.HashMap; import java.util.ArrayList; import com.bitshift.parsing.symbols.Symbols; From 2d7c1f47686e05bcae8a58d5969b24922a977360 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 13:22:15 -0400 Subject: [PATCH 24/36] Fix array out of bounds exception coming from JavaParser.java --- .../main/java/com/bitshift/parsing/parsers/JavaParser.java | 13 +++++++++---- test/{java_parser_test.py => parser_test.py} | 0 2 files changed, 9 insertions(+), 4 deletions(-) rename test/{java_parser_test.py => parser_test.py} (100%) diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java index 48287aa..0150af4 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -94,14 +94,19 @@ public class JavaParser extends Parser { ((QualifiedName) nameObj).getFullyQualifiedName() : ((SimpleName) nameObj).getIdentifier(); List statements = node.getBody().statements(); - Statement last = statements.get(statements.size() - 1); int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - int el = this.root.getLineNumber(last.getStartPosition()); - int ec = this.root.getColumnNumber(last.getStartPosition()); + Integer el = null; + Integer ec = null; - data.put("coord", Symbols.createCoord(sl, sc, null, null)); + if (statements.size() > 0) { + Statement last = statements.get(statements.size() - 1); + el = this.root.getLineNumber(last.getStartPosition()); + ec = this.root.getColumnNumber(last.getStartPosition()); + } + + data.put("coord", Symbols.createCoord(sl, sc, el, ec)); data.put("name", name); this._cache.push(data); return true; diff --git a/test/java_parser_test.py b/test/parser_test.py similarity index 100% rename from test/java_parser_test.py rename to test/parser_test.py From c859416d2d35aab83fc9e8f400e00f8f07c0b8a9 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 13:22:47 -0400 Subject: [PATCH 25/36] Change test file to support different parsers --- test/parser_test.py | 48 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/test/parser_test.py b/test/parser_test.py index 7f87f2c..253da10 100644 --- a/test/parser_test.py +++ b/test/parser_test.py @@ -1,18 +1,40 @@ -import socket +import socket, sys -client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -client_socket.connect(("localhost", 5002)) +file_name = 'resources/.c' +server_socket_number = 5001 -with open("resources/Matrix.java", "r") as java_file: - source = java_file.read() + "\nEOS_BITSHIFT" - client_socket.send("%d\n%s" % (len(source), source)); +if __name__ == '__main__': + if len(sys.argv) == 1: + print "Please input a parser to test." -data = '' -while True: - data = client_socket.recv(10000) + elif len(sys.argv) > 2: + print "Too many arguments." - if data != '': - client_socket.close() - break; + else: + if sys.argv[1] == 'c': + pass -print data; + elif sys.argv[1] == 'java': + file_name = "resources/Matrix.java" + server_socket_number = 5002 + + elif sys.argv[1] == 'ruby': + file_name = "resources/.rb" + server_socket_number = 5003 + + client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + client_socket.connect(("localhost", server_socket_number)) + + with open(file_name, "r") as source_file: + source = source_file.read() + client_socket.send("%d\n%s" % (len(source), source)); + + data = '' + while True: + data = client_socket.recv(10000) + + if data != '': + client_socket.close() + break; + + print data; From 08f16074fb297ae8b16c043693b8f17f26cdc4c1 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 20 Apr 2014 20:46:55 -0400 Subject: [PATCH 26/36] Add template for ruby parser --- .gitignore | 3 +++ parsers/ruby/Gemfile | 4 ++++ parsers/ruby/Rakefile | 0 3 files changed, 7 insertions(+) create mode 100644 parsers/ruby/Gemfile create mode 100644 parsers/ruby/Rakefile diff --git a/.gitignore b/.gitignore index 26d8b64..216b1f5 100644 --- a/.gitignore +++ b/.gitignore @@ -43,4 +43,7 @@ nosetests.xml # Maven parsers/java/target/* + +# Ctags */tags +logs/* diff --git a/parsers/ruby/Gemfile b/parsers/ruby/Gemfile new file mode 100644 index 0000000..cfb76e2 --- /dev/null +++ b/parsers/ruby/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +gem 'ruby_parser' +gem 'sexp_processor' diff --git a/parsers/ruby/Rakefile b/parsers/ruby/Rakefile new file mode 100644 index 0000000..e69de29 From 7f1d9dd2d3cc6aba91df0a27aeeaa27fc4861878 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 27 Apr 2014 22:46:07 -0400 Subject: [PATCH 27/36] Add a working preliminary version of the ruby parser. Still need to add a rule for running it in the Rakefile. Add: parser_server.rb: - listens for connections from the python client process parser.rb: - creates a syntax tree from the input and returns relevant data about it to the client --- .gitignore | 7 ++- parsers/ruby/lib/parse_server.rb | 22 +++++++ parsers/ruby/lib/parser.rb | 125 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 parsers/ruby/lib/parse_server.rb create mode 100644 parsers/ruby/lib/parser.rb diff --git a/.gitignore b/.gitignore index 216b1f5..319057d 100644 --- a/.gitignore +++ b/.gitignore @@ -42,8 +42,11 @@ nosetests.xml .pydevproject # Maven -parsers/java/target/* +target + +# Ruby +!parsers/ruby/lib # Ctags */tags -logs/* +log diff --git a/parsers/ruby/lib/parse_server.rb b/parsers/ruby/lib/parse_server.rb new file mode 100644 index 0000000..bcc605f --- /dev/null +++ b/parsers/ruby/lib/parse_server.rb @@ -0,0 +1,22 @@ +require 'socket' +require File.expand_path('../parser.rb', __FILE__) + +server = TCPServer.new 5003 + +loop do + # Start a new thread for each client accepted + Thread.start(server.accept) do |client| + begin + # Get the amount of data to be read + size = (client.readline).to_i + p = Bitshift::Parser.new client.read(size) + # Get the parsed result + symbols = p.parse.to_s + client.puts [symbols.length].pack('c') + client.puts symbols + ensure + # Close the socket + client.close + end + end +end diff --git a/parsers/ruby/lib/parser.rb b/parsers/ruby/lib/parser.rb new file mode 100644 index 0000000..5751ce0 --- /dev/null +++ b/parsers/ruby/lib/parser.rb @@ -0,0 +1,125 @@ +require 'socket' +require 'ruby_parser' +require 'sexp_processor' + +module Bitshift + class Parser + def initialize(source) + @source = source + end + + def parse + parser = RubyParser.new + tree = parser.parse(@source) + offset = tree.line - 1 + processor = NodeVisitor.new offset + processor.process tree + return processor.symbols + end + end + + class NodeVisitor < SexpProcessor + attr_accessor :symbols + attr_accessor :offset + + def initialize(offset) + super() + @require_empty = false + @offset = offset + + module_hash = Hash.new {|hash, key| hash[key] = Hash.new} + class_hash = module_hash.clone + function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } } + var_hash = Hash.new {|hash, key| hash[key] = [] } + + @symbols = { + modules: module_hash, + classes: class_hash, + functions: function_hash, + vars: var_hash + } + end + + def block_position(exp) + pos = Hash.new + end_ln = (start_ln = exp.line - offset) + cur_exp = exp + + while cur_exp.is_a? Sexp + end_ln = cur_exp.line - offset + cur_exp = cur_exp.last + break if cur_exp == nil + end + + pos[:coord] = { + start_ln: start_ln, + end_ln: end_ln } + return pos + end + + def statement_position(exp) + pos = Hash.new + end_ln = start_ln = exp.line - offset + + pos[:coord] = { + start_ln: start_ln, + end_ln: end_ln } + return pos + end + + def process_module(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:modules][name] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_class(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:classes][name] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_defn(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:functions][name][:declaration] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_call(exp) + pos = statement_position exp + exp.shift + exp.shift + name = exp.shift + symbols[:functions][name][:calls] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_iasgn(exp) + pos = statement_position exp + exp.shift + name = exp.shift + symbols[:vars][name] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_lasgn(exp) + pos = statement_position exp + exp.shift + name = exp.shift + symbols[:vars][name] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + end +end From d8b234f4629529e07b22b8779e8954c865671a0a Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 27 Apr 2014 22:47:19 -0400 Subject: [PATCH 28/36] Update docstrings and parser dispatching in parser init file. --- bitshift/parser/__init__.py | 35 ++++++++++++----------------------- bitshift/parser/java.py | 0 bitshift/parser/ruby.py | 0 3 files changed, 12 insertions(+), 23 deletions(-) create mode 100644 bitshift/parser/java.py create mode 100644 bitshift/parser/ruby.py diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 6f2f898..8d37d74 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,7 +1,9 @@ -import os, ast import pygments.lexers as pgl from ..languages import LANGS from .python import parse_py +from .c import parse_c +from .java import parse_java +from .ruby import parse_ruby _all__ = ["parse"] @@ -24,39 +26,26 @@ def _lang(codelet): def parse(codelet, pid): """ - Sends codelet code to the Java parsing process via a named pipe. Reads the - resulting symbols from the pipe and updates the codelet. + Dispatches the codelet to the correct parser based on its language. :param codelet: The codelet object to parsed. :param pid: The id of the current python process. :type code: Codelet :param pid: str. - - .. todo:: - Identify languages using pygments and change the write file based on - that. """ - codelet.language = _lang(codelet) + lang = _lang(codelet) - if codelet.language == LANGS.index("Python"): + if lang == LANGS.index("Python"): parse_py(codelet) - else: - write_f = "../../tmp/%d_parser.proc" % codelet.language - - with open(write_f, 'a') as wf: - wf.write('pid:' + str(pid) + '\n') - wf.write('body:\n' + codelet.code) - - read_f = '../../tmp/%s_py.data' % str(pid) - data = '' + elif lang == LANGS.index("C"): + parse_c(codelet) - while data == '': - with open(read_f) as rf: - data = rf.read() + elif lang == LANGS.index("Java"): + parse_java(codelet) - os.remove(read_f) - codelet.symbols = ast.literal_eval(data.split(',')[1]) + elif lang == LANGS.index("Ruby"): + parse_ruby(codelet) diff --git a/bitshift/parser/java.py b/bitshift/parser/java.py new file mode 100644 index 0000000..e69de29 diff --git a/bitshift/parser/ruby.py b/bitshift/parser/ruby.py new file mode 100644 index 0000000..e69de29 From 6e54eb51473b1053c9d17ccc51e78d01e2bd64c0 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 27 Apr 2014 22:48:02 -0400 Subject: [PATCH 29/36] Java server tells python client how much data to read. --- .../java/com/bitshift/parsing/parsers/Parser.java | 7 +- .../com/bitshift/parsing/utils/PackableMemory.java | 89 +++++++++++++++ test/parser_test.py | 40 +++++-- test/resources/parser.rb | 126 +++++++++++++++++++++ 4 files changed, 248 insertions(+), 14 deletions(-) create mode 100644 parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java create mode 100644 test/resources/parser.rb diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java index 088c185..9d00954 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java @@ -8,6 +8,7 @@ import java.io.IOException; import java.net.Socket; import com.bitshift.parsing.symbols.Symbols; +import com.bitshift.parsing.utils.PackableMemory; public abstract class Parser implements Runnable { @@ -48,7 +49,9 @@ public abstract class Parser implements Runnable { PrintWriter clientWriter = new PrintWriter( this.clientSocket.getOutputStream(), true); - clientWriter.println(toClient); + PackableMemory mem = new PackableMemory(toClient.length()); + String dataSize = new String(mem.mem); + clientWriter.println(dataSize + toClient); } catch (IOException ex) { } } @@ -56,6 +59,6 @@ public abstract class Parser implements Runnable { protected abstract Symbols genSymbols(); public abstract void run(); - + } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java b/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java new file mode 100644 index 0000000..24d883c --- /dev/null +++ b/parsers/java/src/main/java/com/bitshift/parsing/utils/PackableMemory.java @@ -0,0 +1,89 @@ +package com.bitshift.parsing.utils; + +//This class contains implementations of methods to +// -- pack an integer into 4 consecutive bytes of a byte array +// -- unpack an integer from 4 consecutive bytes of a byte array +// -- exhaustively test the pack and unpack methods. +// +// This file should be saved as PackableMemory.java. Once it has been +// compiled, the tester can be invoked by typing "java PackableMemory" + +public class PackableMemory { + int size; + public byte mem[] = null; + + public PackableMemory(int size) + { + this.size = size; + this.mem = new byte[size]; + } + + // Pack the 4-byte integer val into the four bytes mem[loc]...mem[loc+3]. + // The most significant porion of the integer is stored in mem[loc]. + // Bytes are masked out of the integer and stored in the array, working + // from right(least significant) to left (most significant). + void pack(int val, int loc) + { + final int MASK = 0xff; + for (int i = 3; i >= 0; i--) + { + mem[loc+i] = (byte)(val & MASK); + val = val >> 8; + } + } + + // Unpack the four bytes mem[loc]...mem[loc+3] into a 4-byte integer, + // and return the resulting integer value. + // The most significant porion of the integer is stored in mem[loc]. + // Bytes are 'OR'ed into the integer, working from left (most significant) + // to right (least significant) + int unpack(int loc) + { + final int MASK = 0xff; + int v = (int)mem[loc] & MASK; + for (int i = 1; i < 4; i++) + { + v = v << 8; + v = v | ((int)mem[loc+i] & MASK); + } + return v; + } + + + + // Test the above pack and unpack methods by iterating the following + // over all possible 4-byte integers: pack the integer, + // then unpack it, and then verify that the unpacked integer equals the + // original integer. It tests all nonnegative numbers in ascending order + // and then all negative numbers in ascending order. The transition from + // positive to negative numbers happens implicitly due to integer overflow. + public void packTest() + { + + int i = 0; + long k = 0; + do + { + this.pack(i,4); + int j = this.unpack(4); + if (j != i) + { + System.out.printf("pack/unpack test failed: i = %d, j = %d\n",i,j); + System.exit(0); + } + i++; k++; + } + while (i != 0); + System.out.printf("pack/unpack test successful, %d iterations\n",k); + } + + // main routine to test the PackableMemory class by running the + // packTest() method. + public static void main(String[] args) + { + PackableMemory pm = new PackableMemory(100); + pm.packTest(); + System.exit(0); + } +} + diff --git a/test/parser_test.py b/test/parser_test.py index 253da10..a1cfad3 100644 --- a/test/parser_test.py +++ b/test/parser_test.py @@ -1,7 +1,8 @@ -import socket, sys +import socket, sys, struct file_name = 'resources/.c' server_socket_number = 5001 +recv_size = 8192 if __name__ == '__main__': if len(sys.argv) == 1: @@ -19,22 +20,37 @@ if __name__ == '__main__': server_socket_number = 5002 elif sys.argv[1] == 'ruby': - file_name = "resources/.rb" + file_name = "resources/parser.rb" server_socket_number = 5003 - client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - client_socket.connect(("localhost", server_socket_number)) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.connect(("localhost", server_socket_number)) with open(file_name, "r") as source_file: source = source_file.read() - client_socket.send("%d\n%s" % (len(source), source)); + server_socket.send("%d\n%s" % (len(source), source)); - data = '' - while True: - data = client_socket.recv(10000) + total_data = []; size_data = cur_data = '' + total_size = 0; size = sys.maxint - if data != '': - client_socket.close() - break; + while total_size < size: + cur_data = server_socket.recv(recv_size) - print data; + if not total_data: + if len(size_data) > 4: + size_data += cur_data + size = struct.unpack('>i', size_data[:4])[0] + recv_size = size + if recv_size > sys.maxint: recv_size = sys.maxint + total_data.append(size_data[4:]) + else: + size_data += cur_data + + else: + total_data.append(cur_data) + + total_size = sum([len(s) for s in total_data]) + + + server_socket.close() + print ''.join(total_data); diff --git a/test/resources/parser.rb b/test/resources/parser.rb new file mode 100644 index 0000000..01d934b --- /dev/null +++ b/test/resources/parser.rb @@ -0,0 +1,126 @@ +require 'socket' +require 'ruby_parser' +require 'sexp_processor' + +module Bitshift + class Parser + def initialize(source) + @source = source + end + + def parse + parser = RubyParser.new + tree = parser.parse(@source) + puts tree.inspect + offset = tree.line - 1 + processor = NodeVisitor.new offset + processor.process tree + return processor.symbols + end + end + + class NodeVisitor < SexpProcessor + attr_accessor :symbols + attr_accessor :offset + + def initialize(offset) + super() + @require_empty = false + @offset = offset + + module_hash = Hash.new {|hash, key| hash[key] = Hash.new} + class_hash = module_hash.clone + function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } } + var_hash = Hash.new {|hash, key| hash[key] = [] } + + @symbols = { + modules: module_hash, + classes: class_hash, + functions: function_hash, + vars: var_hash + } + end + + def block_position(exp) + pos = Hash.new + end_ln = (start_ln = exp.line - offset) + cur_exp = exp + + while cur_exp.is_a? Sexp + end_ln = cur_exp.line - offset + cur_exp = cur_exp.last + break if cur_exp == nil + end + + pos[:coord] = { + start_ln: start_ln, + end_ln: end_ln } + return pos + end + + def statement_position(exp) + pos = Hash.new + end_ln = start_ln = exp.line - offset + + pos[:coord] = { + start_ln: start_ln, + end_ln: end_ln } + return pos + end + + def process_module(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:modules][name] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_class(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:classes][name] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_defn(exp) + pos = block_position exp + exp.shift + name = exp.shift + symbols[:functions][name][:declaration] = pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_call(exp) + pos = statement_position exp + exp.shift + exp.shift + name = exp.shift + symbols[:functions][name][:calls] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_iasgn(exp) + pos = statement_position exp + exp.shift + name = exp.shift + symbols[:vars][name] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + + def process_lasgn(exp) + pos = statement_position exp + exp.shift + name = exp.shift + symbols[:vars][name] << pos + exp.each_sexp {|s| process(s)} + return exp.clear + end + end +end From 71dec1d2690f2e0074345e0aea02d81d38d4dc40 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Sun, 27 Apr 2014 23:21:44 -0400 Subject: [PATCH 30/36] Modify the python parser. Make data more descriptive by adding data about function calls. --- bitshift/parser/python.py | 72 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 1e011fb..2f15cb5 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -58,16 +58,20 @@ class _TreeCutter(ast.NodeVisitor): if isinstance(t, ast.Tuple): for n in t.elts: line, col = n.lineno, n.col_offset - self.accum['functions'][n.id]['start_ln'] = line - self.accum['functions'][n.id]['start_col'] = col - self.accum['functions'][n.id]['end_ln'] = line - self.accum['functions'][n.id]['end_ln'] = col + + if not self.accum['vars'].has_key(node.name): + self.accum['vars'][node.name] = {'declaration': {}, 'uses': []} + + self.accum['vars'][n.id]['declaration']['start_ln'] = line + self.accum['vars'][n.id]['declaration']['start_col'] = col + self.accum['vars'][n.id]['declaration']['end_ln'] = line + self.accum['vars'][n.id]['declaration']['end_ln'] = col else: line, col = t.lineno, t.col_offset - self.accum['functions'][t.id]['start_ln'] = line - self.accum['functions'][t.id]['start_col'] = col - self.accum['functions'][t.id]['end_ln'] = line - self.accum['functions'][t.id]['end_ln'] = col + self.accum['vars'][t.id]['declaration']['start_ln'] = line + self.accum['vars'][t.id]['declaration']['start_col'] = col + self.accum['vars'][t.id]['declaration']['end_ln'] = line + self.accum['vars'][t.id]['declaration']['end_ln'] = col self.generic_visit(node) @@ -84,13 +88,45 @@ class _TreeCutter(ast.NodeVisitor): """ start_line, start_col, end_line, end_col = self.start_n_end(node) - self.accum['functions'][node.name]['start_ln'] = start_line - self.accum['functions'][node.name]['start_col'] = start_col - self.accum['functions'][node.name]['end_ln'] = end_line - self.accum['functions'][node.name]['end_ln'] = end_col + + if not self.accum['functions'].has_key(node.name): + self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} + + self.accum['functions'][node.name]['declaration']['start_ln'] = start_line + self.accum['functions'][node.name]['declaration']['start_col'] = start_col + self.accum['functions'][node.name]['declaration']['end_ln'] = end_line + self.accum['functions'][node.name]['declaration']['end_ln'] = end_col self.generic_visit(node) + def visit_Call(self, node): + """ + Visits Function Call nodes in a tree. Adds relevant data about them + in the functions section for accum. + + :param node: The current node. + + :type node: ast.Call + + .. todo:: + Add arguments and decorators metadata to accum. + """ + + line, col = node.line_no, node.col_offset + + if not self.accum['functions'].has_key(node.name): + self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} + + pos = {} + pos['start_line'] = line + pos['start_col'] = col + pos['end_line'] = line + pos['end_col'] = col + self.accum['functions'][node.name]['calls'].append(pos) + + self.generic_visit(node) + + def visit_ClassDef(self, node): """ Visits ClassDef nodes in a tree. Adds relevant data about them to accum. @@ -104,13 +140,17 @@ class _TreeCutter(ast.NodeVisitor): """ start_line, start_col, end_line, end_col = self.start_n_end(node) - self.accum['functions'][node.name]['start_ln'] = start_line - self.accum['functions'][node.name]['start_col'] = start_col - self.accum['functions'][node.name]['end_ln'] = end_line - self.accum['functions'][node.name]['end_ln'] = end_col + + self.accum['classes'][node.name]['start_ln'] = start_line + self.accum['classes'][node.name]['start_col'] = start_col + self.accum['classes'][node.name]['end_ln'] = end_line + self.accum['classes'][node.name]['end_ln'] = end_col self.generic_visit(node) + def visit_Name(self, node): + pass + def parse_py(codelet): """ Adds 'symbols' field to the codelet after parsing the python code. From b16bc40d3f76eb512a3ff1656cae3a68fc3dd677 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 28 Apr 2014 00:14:07 -0400 Subject: [PATCH 31/36] Consolidate parsers into __init__.py. Update python.py parser. --- bitshift/parser/__init__.py | 64 ++++++++++++++++------ bitshift/parser/java.py | 0 bitshift/parser/python.py | 52 +++++++++++------- bitshift/parser/ruby.py | 0 .../java/com/bitshift/parsing/symbols/Symbols.java | 2 +- 5 files changed, 80 insertions(+), 38 deletions(-) delete mode 100644 bitshift/parser/java.py delete mode 100644 bitshift/parser/ruby.py diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 8d37d74..571e25d 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,9 +1,6 @@ -import pygments.lexers as pgl +import ast, pygments.lexers as pgl, sys, socket, struct from ..languages import LANGS from .python import parse_py -from .c import parse_c -from .java import parse_java -from .ruby import parse_ruby _all__ = ["parse"] @@ -24,28 +21,63 @@ def _lang(codelet): return LANGS.index(pgl.guess_lexer(codelet.code)) -def parse(codelet, pid): +def _recv_data(server_socket): + """ + Private function to read string response from a server. It reads a certain + amount of data based on the size it is sent from the server. + + :param server_socket: The server that the client is connected to, and will, + read from. + + :type code: socket.ServerSocket + """ + + recv_size = 8192 + total_data = []; size_data = cur_data = '' + total_size = 0; size = sys.maxint + + while total_size < size: + cur_data = server_socket.recv(recv_size) + + if not total_data: + if len(size_data) > 4: + size_data += cur_data + size = struct.unpack('>i', size_data[:4])[0] + recv_size = size + if recv_size > sys.maxint: recv_size = sys.maxint + total_data.append(size_data[4:]) + else: + size_data += cur_data + + else: + total_data.append(cur_data) + + total_size = sum([len(s) for s in total_data]) + + server_socket.close() + return ''.join(total_data); + + +def parse(codelet): """ Dispatches the codelet to the correct parser based on its language. :param codelet: The codelet object to parsed. - :param pid: The id of the current python process. :type code: Codelet - :param pid: str. """ - lang = _lang(codelet) + lang = _lang(codelet); source = codelet.code + server_socket_number = 5000 + lang - if lang == LANGS.index("Python"): + if lang == LANGS.index('Python'): parse_py(codelet) - elif lang == LANGS.index("C"): - parse_c(codelet) - - elif lang == LANGS.index("Java"): - parse_java(codelet) + else: + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.connect(("localhost", server_socket_number)) + server_socket.send("%d\n%s" % (len(source), source)); - elif lang == LANGS.index("Ruby"): - parse_ruby(codelet) + symbols = ast.literal_eval(_recv_data(server_socket)) + codelet.symbols = symbols diff --git a/bitshift/parser/java.py b/bitshift/parser/java.py deleted file mode 100644 index e69de29..0000000 diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 2f15cb5..ac71e29 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -62,16 +62,22 @@ class _TreeCutter(ast.NodeVisitor): if not self.accum['vars'].has_key(node.name): self.accum['vars'][node.name] = {'declaration': {}, 'uses': []} - self.accum['vars'][n.id]['declaration']['start_ln'] = line - self.accum['vars'][n.id]['declaration']['start_col'] = col - self.accum['vars'][n.id]['declaration']['end_ln'] = line - self.accum['vars'][n.id]['declaration']['end_ln'] = col + pos = {'coord': {}} + pos['coord']['start_line'] = line + pos['coord']['start_col'] = col + pos['coord']['end_line'] = line + pos['coord']['end_col'] = col + self.accum['vars'][n.id]['declaration'] = pos + else: line, col = t.lineno, t.col_offset - self.accum['vars'][t.id]['declaration']['start_ln'] = line - self.accum['vars'][t.id]['declaration']['start_col'] = col - self.accum['vars'][t.id]['declaration']['end_ln'] = line - self.accum['vars'][t.id]['declaration']['end_ln'] = col + + pos = {'coord': {}} + pos['coord']['start_line'] = line + pos['coord']['start_col'] = col + pos['coord']['end_line'] = line + pos['coord']['end_col'] = col + self.accum['vars'][t.id]['declaration'] = pos self.generic_visit(node) @@ -92,10 +98,12 @@ class _TreeCutter(ast.NodeVisitor): if not self.accum['functions'].has_key(node.name): self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} - self.accum['functions'][node.name]['declaration']['start_ln'] = start_line - self.accum['functions'][node.name]['declaration']['start_col'] = start_col - self.accum['functions'][node.name]['declaration']['end_ln'] = end_line - self.accum['functions'][node.name]['declaration']['end_ln'] = end_col + pos = {'coord': {}} + pos['coord']['start_ln']= start_line + pos['coord']['start_col'] = start_col + pos['coord']['end_ln'] = end_line + pos['coord']['end_col'] = end_col + self.accum['functions'][node.name]['declaration'] = pos self.generic_visit(node) @@ -117,11 +125,11 @@ class _TreeCutter(ast.NodeVisitor): if not self.accum['functions'].has_key(node.name): self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} - pos = {} - pos['start_line'] = line - pos['start_col'] = col - pos['end_line'] = line - pos['end_col'] = col + pos = {'coord': {}} + pos['coord']['start_line'] = line + pos['coord']['start_col'] = col + pos['coord']['end_line'] = line + pos['coord']['end_col'] = col self.accum['functions'][node.name]['calls'].append(pos) self.generic_visit(node) @@ -141,10 +149,12 @@ class _TreeCutter(ast.NodeVisitor): start_line, start_col, end_line, end_col = self.start_n_end(node) - self.accum['classes'][node.name]['start_ln'] = start_line - self.accum['classes'][node.name]['start_col'] = start_col - self.accum['classes'][node.name]['end_ln'] = end_line - self.accum['classes'][node.name]['end_ln'] = end_col + pos = {'coord': {}} + pos['coord']['start_ln']= start_line + pos['coord']['start_col'] = start_col + pos['coord']['end_ln'] = end_line + pos['coord']['end_col'] = end_col + self.accum['classes'][node.name] = pos self.generic_visit(node) diff --git a/bitshift/parser/ruby.py b/bitshift/parser/ruby.py deleted file mode 100644 index e69de29..0000000 diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java index 8bbf44d..7d6c4ea 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java @@ -5,7 +5,7 @@ import java.util.ArrayList; public abstract class Symbols { public Symbols() { - + } public static ArrayList createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) { From d8048a74f0454f6705bd76514c4bbc60079480a6 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 28 Apr 2014 02:44:52 -0400 Subject: [PATCH 32/36] Fix data length sent to client from ruby server. Pad with extra bytes. --- parsers/ruby/lib/parse_server.rb | 15 +++++++++++++-- parsers/ruby/lib/parser.rb | 30 ++++++++++++++++++------------ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/parsers/ruby/lib/parse_server.rb b/parsers/ruby/lib/parse_server.rb index bcc605f..0dcaebb 100644 --- a/parsers/ruby/lib/parse_server.rb +++ b/parsers/ruby/lib/parse_server.rb @@ -1,6 +1,17 @@ require 'socket' require File.expand_path('../parser.rb', __FILE__) +def pack_int(i) + bytes = []; mask = 255 + + while bytes.length < 4 + bytes.unshift (i & mask) + i = i >> 8 + end + + return bytes.pack('cccc') +end + server = TCPServer.new 5003 loop do @@ -11,8 +22,8 @@ loop do size = (client.readline).to_i p = Bitshift::Parser.new client.read(size) # Get the parsed result - symbols = p.parse.to_s - client.puts [symbols.length].pack('c') + symbols = p.parse + client.puts pack_int(symbols.length) client.puts symbols ensure # Close the socket diff --git a/parsers/ruby/lib/parser.rb b/parsers/ruby/lib/parser.rb index 5751ce0..150e940 100644 --- a/parsers/ruby/lib/parser.rb +++ b/parsers/ruby/lib/parser.rb @@ -12,9 +12,9 @@ module Bitshift parser = RubyParser.new tree = parser.parse(@source) offset = tree.line - 1 - processor = NodeVisitor.new offset - processor.process tree - return processor.symbols + processor = NodeVisitor.new offset, tree + processor.process(tree) + return processor.to_s end end @@ -22,16 +22,16 @@ module Bitshift attr_accessor :symbols attr_accessor :offset - def initialize(offset) + def initialize(offset, tree) super() - @require_empty = false - @offset = offset module_hash = Hash.new {|hash, key| hash[key] = Hash.new} class_hash = module_hash.clone function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } } var_hash = Hash.new {|hash, key| hash[key] = [] } + @require_empty = false + @offset = offset @symbols = { modules: module_hash, classes: class_hash, @@ -68,7 +68,7 @@ module Bitshift end def process_module(exp) - pos = block_position exp + pos = block_position(exp) exp.shift name = exp.shift symbols[:modules][name] = pos @@ -77,7 +77,7 @@ module Bitshift end def process_class(exp) - pos = block_position exp + pos = block_position(exp) exp.shift name = exp.shift symbols[:classes][name] = pos @@ -86,7 +86,7 @@ module Bitshift end def process_defn(exp) - pos = block_position exp + pos = block_position(exp) exp.shift name = exp.shift symbols[:functions][name][:declaration] = pos @@ -95,7 +95,7 @@ module Bitshift end def process_call(exp) - pos = statement_position exp + pos = statement_position(exp) exp.shift exp.shift name = exp.shift @@ -105,7 +105,7 @@ module Bitshift end def process_iasgn(exp) - pos = statement_position exp + pos = statement_position(exp) exp.shift name = exp.shift symbols[:vars][name] << pos @@ -114,12 +114,18 @@ module Bitshift end def process_lasgn(exp) - pos = statement_position exp + pos = statement_position(exp) exp.shift name = exp.shift symbols[:vars][name] << pos exp.each_sexp {|s| process(s)} return exp.clear end + + def to_s + str = symbols.to_s + str = str.gsub(/:(\w*)=>/, '"\1":') + return str + end end end From 4cc0626a710f54917875362d064a8e9d21de293f Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 28 Apr 2014 17:17:22 -0400 Subject: [PATCH 33/36] Catch ClassNotFound error in parser __init__.py --- bitshift/parser/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 571e25d..5522ae6 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -4,6 +4,9 @@ from .python import parse_py _all__ = ["parse"] +class UnsupportedFileError(Exception): + pass + def _lang(codelet): """ Private function to identify the language of a codelet. @@ -16,10 +19,13 @@ def _lang(codelet): Modify function to incorporate tags from stackoverflow. """ - if codelet.filename is not None: - return pgl.guess_lexer_for_filename(codelet.filename).name + try: + if codelet.filename is not None: + return pgl.guess_lexer_for_filename(codelet.filename, '').name - return LANGS.index(pgl.guess_lexer(codelet.code)) + return LANGS.index(pgl.guess_lexer(codelet.code)) + except: + raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') def _recv_data(server_socket): """ @@ -61,6 +67,9 @@ def _recv_data(server_socket): def parse(codelet): """ Dispatches the codelet to the correct parser based on its language. + It is the job of the respective parsers to accumulate data about the + code and to convert it into a string representing a python dict. + The codelet is then given dict as its 'symbols' field. :param codelet: The codelet object to parsed. @@ -68,6 +77,7 @@ def parse(codelet): """ lang = _lang(codelet); source = codelet.code + codelet.language = lang server_socket_number = 5000 + lang if lang == LANGS.index('Python'): From 044a448602522559a499c91d95de5c81af308962 Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 5 May 2014 12:40:06 -0400 Subject: [PATCH 34/36] Change the format of the symbols to fit with earwig's issue. --- bitshift/parser/__init__.py | 14 ++-- bitshift/parser/python.py | 88 +++++++++------------- parsers/java/pom.xml | 3 +- .../com/bitshift/parsing/symbols/JavaSymbols.java | 50 ++++++++---- parsers/ruby/lib/parser.rb | 31 ++++---- 5 files changed, 93 insertions(+), 93 deletions(-) diff --git a/bitshift/parser/__init__.py b/bitshift/parser/__init__.py index 5522ae6..55c76e1 100644 --- a/bitshift/parser/__init__.py +++ b/bitshift/parser/__init__.py @@ -1,4 +1,4 @@ -import ast, pygments.lexers as pgl, sys, socket, struct +import json, pygments.lexers as pgl, sys, socket, struct from ..languages import LANGS from .python import parse_py @@ -19,13 +19,13 @@ def _lang(codelet): Modify function to incorporate tags from stackoverflow. """ - try: - if codelet.filename is not None: + if codelet.filename is not None: + try: return pgl.guess_lexer_for_filename(codelet.filename, '').name + except: + raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') - return LANGS.index(pgl.guess_lexer(codelet.code)) - except: - raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename') + return LANGS.index(pgl.guess_lexer(codelet.code)) def _recv_data(server_socket): """ @@ -88,6 +88,6 @@ def parse(codelet): server_socket.connect(("localhost", server_socket_number)) server_socket.send("%d\n%s" % (len(source), source)); - symbols = ast.literal_eval(_recv_data(server_socket)) + symbols = json.loads(_recv_data(server_socket)) codelet.symbols = symbols diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index ac71e29..7e9b109 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -1,6 +1,6 @@ import ast -class _TreeCutter(ast.NodeVisitor): +class _CachedWalker(ast.NodeVisitor): """ Local node visitor for python abstract syntax trees. @@ -22,9 +22,9 @@ class _TreeCutter(ast.NodeVisitor): """ self.accum = {'vars': {}, 'functions': {}, 'classes': {}} - self.cache = None + self.cache = [] - def start_n_end(self, node): + def block_position(self, node): """ Helper function to get the start and end lines of an AST node. @@ -54,32 +54,19 @@ class _TreeCutter(ast.NodeVisitor): Add value and type metadata to accum. """ - for t in node.targets: - if isinstance(t, ast.Tuple): - for n in t.elts: - line, col = n.lineno, n.col_offset + line, col = node.lineno, node.col_offset + pos = (line, col, line, col) - if not self.accum['vars'].has_key(node.name): - self.accum['vars'][node.name] = {'declaration': {}, 'uses': []} - - pos = {'coord': {}} - pos['coord']['start_line'] = line - pos['coord']['start_col'] = col - pos['coord']['end_line'] = line - pos['coord']['end_col'] = col - self.accum['vars'][n.id]['declaration'] = pos + self.cache.append({'nodes': []}) + self.generic_visit(node) + last = self.cache.pop() - else: - line, col = t.lineno, t.col_offset + for name in last['nodes']: + if not self.accum['vars'].has_key(name): + self.accum['vars'][name] = {'assignments': [], 'uses': []} - pos = {'coord': {}} - pos['coord']['start_line'] = line - pos['coord']['start_col'] = col - pos['coord']['end_line'] = line - pos['coord']['end_col'] = col - self.accum['vars'][t.id]['declaration'] = pos + self.accum['vars'][name]['assignments'].append(pos) - self.generic_visit(node) def visit_FunctionDef(self, node): """ @@ -93,17 +80,13 @@ class _TreeCutter(ast.NodeVisitor): Add arguments and decorators metadata to accum. """ - start_line, start_col, end_line, end_col = self.start_n_end(node) + start_line, start_col, end_line, end_col = self.block_position(node) if not self.accum['functions'].has_key(node.name): - self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} + self.accum['functions'][node.name] = {'assignments': [], 'uses': []} - pos = {'coord': {}} - pos['coord']['start_ln']= start_line - pos['coord']['start_col'] = start_col - pos['coord']['end_ln'] = end_line - pos['coord']['end_col'] = end_col - self.accum['functions'][node.name]['declaration'] = pos + pos = (start_line, start_col, end_line, end_col) + self.accum['functions'][node.name]['assignments'].append(pos) self.generic_visit(node) @@ -120,20 +103,18 @@ class _TreeCutter(ast.NodeVisitor): Add arguments and decorators metadata to accum. """ - line, col = node.line_no, node.col_offset + line, col = node.lineno, node.col_offset + pos = (line, col, line, col) - if not self.accum['functions'].has_key(node.name): - self.accum['functions'][node.name] = {'declaration': {}, 'calls': []} + if isinstance(node.func, ast.Name): + name = node.func.id + else: + name = node.func.attr - pos = {'coord': {}} - pos['coord']['start_line'] = line - pos['coord']['start_col'] = col - pos['coord']['end_line'] = line - pos['coord']['end_col'] = col - self.accum['functions'][node.name]['calls'].append(pos) - - self.generic_visit(node) + if not self.accum['functions'].has_key(name): + self.accum['functions'][name] = {'assignments': [], 'uses': []} + self.accum['functions'][name]['uses'].append(pos) def visit_ClassDef(self, node): """ @@ -147,19 +128,22 @@ class _TreeCutter(ast.NodeVisitor): Add arguments, inherits, and decorators metadata to accum. """ - start_line, start_col, end_line, end_col = self.start_n_end(node) + start_line, start_col, end_line, end_col = self.block_position(node) - pos = {'coord': {}} - pos['coord']['start_ln']= start_line - pos['coord']['start_col'] = start_col - pos['coord']['end_ln'] = end_line - pos['coord']['end_col'] = end_col + pos = (start_line, start_col, end_line, end_col) self.accum['classes'][node.name] = pos self.generic_visit(node) def visit_Name(self, node): - pass + if self.cache: + last = self.cache[-1] + last['nodes'].append(node.id) + + def visit_Attribute(self, node): + if self.cache: + last = self.cache[-1] + last['nodes'].append(node.attr) def parse_py(codelet): """ @@ -171,6 +155,6 @@ def parse_py(codelet): """ tree = ast.parse(codelet.code) - cutter = _TreeCutter() + cutter = _CachedWalker() cutter.visit(tree) codelet.symbols = cutter.accum diff --git a/parsers/java/pom.xml b/parsers/java/pom.xml index 340feb0..cfecc30 100644 --- a/parsers/java/pom.xml +++ b/parsers/java/pom.xml @@ -16,8 +16,7 @@ 3.8.1 test - - + org.eclipse.jdt core 3.3.0-v_771 diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java index 9265feb..dd15468 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -42,9 +42,17 @@ public class JavaSymbols extends Symbols { HashMap method = this._methods.get(name); if (method == null) { method = new HashMap(); - method.put("declaration", data); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + assignments.add(data.get("coord")); + method.put("assignments", assignments); + method.put("uses", uses); } else { - method.put("declaration", data); + ArrayList assignments = (ArrayList)method.get("assignments"); + + assignments.add(data.get("coord")); + method.put("assignments", assignments); } this._methods.put(name, method); @@ -54,14 +62,17 @@ public class JavaSymbols extends Symbols { HashMap method = this._methods.get(name); if (method == null) { method = new HashMap(); - ArrayList calls = new ArrayList(10); - calls.add(data); - method.put("calls", calls); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + uses.add(data.get("coord")); + method.put("assignments", assignments); + method.put("uses", uses); } else { - ArrayList calls = (ArrayList)method.get("calls"); - calls = (calls == null) ? new ArrayList(10) : calls; - calls.add(data); - method.put("calls", calls); + ArrayList uses = (ArrayList)method.get("uses"); + + uses.add(data.get("coord")); + method.put("uses", uses); } this._methods.put(name, method); @@ -77,9 +88,17 @@ public class JavaSymbols extends Symbols { HashMap var = this._vars.get(name); if (var == null) { var = new HashMap(); - var.put("declaration", data); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + + assignments.add(data.get("coord")); + var.put("assignments", assignments); + var.put("uses", uses); } else { - var.put("declaration", data); + ArrayList assignments = (ArrayList)var.get("assignments"); + + assignments.add(data.get("coord")); + var.put("assignments", assignments); } this._vars.put(name, var); @@ -89,13 +108,16 @@ public class JavaSymbols extends Symbols { HashMap var = this._vars.get(name); if (var == null) { var = new HashMap(); + ArrayList assignments = new ArrayList(10); ArrayList uses = new ArrayList(10); - uses.add(data); + + uses.add(data.get("coord")); + var.put("assignments", assignments); var.put("uses", uses); } else { ArrayList uses = (ArrayList)var.get("uses"); - uses = (uses == null) ? new ArrayList(10) : uses; - uses.add(data); + + uses.add(data.get("coord")); var.put("uses", uses); } diff --git a/parsers/ruby/lib/parser.rb b/parsers/ruby/lib/parser.rb index 150e940..c757fa0 100644 --- a/parsers/ruby/lib/parser.rb +++ b/parsers/ruby/lib/parser.rb @@ -12,23 +12,23 @@ module Bitshift parser = RubyParser.new tree = parser.parse(@source) offset = tree.line - 1 - processor = NodeVisitor.new offset, tree + processor = CachedWalker.new offset, tree processor.process(tree) return processor.to_s end end - class NodeVisitor < SexpProcessor + class CachedWalker < SexpProcessor attr_accessor :symbols attr_accessor :offset def initialize(offset, tree) super() - module_hash = Hash.new {|hash, key| hash[key] = Hash.new} + module_hash = Hash.new {|hash, key| hash[key] = { assignments: [], uses: [] }} class_hash = module_hash.clone - function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } } - var_hash = Hash.new {|hash, key| hash[key] = [] } + function_hash = module_hash.clone + var_hash = module_hash.clone @require_empty = false @offset = offset @@ -41,7 +41,6 @@ module Bitshift end def block_position(exp) - pos = Hash.new end_ln = (start_ln = exp.line - offset) cur_exp = exp @@ -51,9 +50,7 @@ module Bitshift break if cur_exp == nil end - pos[:coord] = { - start_ln: start_ln, - end_ln: end_ln } + pos = [start_ln, -1, end_ln, -1] return pos end @@ -61,9 +58,7 @@ module Bitshift pos = Hash.new end_ln = start_ln = exp.line - offset - pos[:coord] = { - start_ln: start_ln, - end_ln: end_ln } + pos = [start_ln, -1, end_ln, -1] return pos end @@ -71,7 +66,7 @@ module Bitshift pos = block_position(exp) exp.shift name = exp.shift - symbols[:modules][name] = pos + symbols[:modules][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -80,7 +75,7 @@ module Bitshift pos = block_position(exp) exp.shift name = exp.shift - symbols[:classes][name] = pos + symbols[:classes][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -89,7 +84,7 @@ module Bitshift pos = block_position(exp) exp.shift name = exp.shift - symbols[:functions][name][:declaration] = pos + symbols[:functions][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -99,7 +94,7 @@ module Bitshift exp.shift exp.shift name = exp.shift - symbols[:functions][name][:calls] << pos + symbols[:functions][name][:uses] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -108,7 +103,7 @@ module Bitshift pos = statement_position(exp) exp.shift name = exp.shift - symbols[:vars][name] << pos + symbols[:vars][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end @@ -117,7 +112,7 @@ module Bitshift pos = statement_position(exp) exp.shift name = exp.shift - symbols[:vars][name] << pos + symbols[:vars][name][:assignments] << pos exp.each_sexp {|s| process(s)} return exp.clear end From d127ac94ad4ad04ce2b356da1f67e0eaf045085b Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 5 May 2014 17:01:25 -0400 Subject: [PATCH 35/36] 1) All unavailable line numbers and column numbers become -1. 2) Add correct dependency in pom.xml --- bitshift/parser/python.py | 4 ++-- parsers/java/pom.xml | 9 ++++----- .../main/java/com/bitshift/parsing/parsers/JavaParser.java | 12 ++++++------ .../main/java/com/bitshift/parsing/symbols/JavaSymbols.java | 10 +++++++++- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index 7e9b109..d0cd7d3 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -55,7 +55,7 @@ class _CachedWalker(ast.NodeVisitor): """ line, col = node.lineno, node.col_offset - pos = (line, col, line, col) + pos = (line, col, -1, -1) self.cache.append({'nodes': []}) self.generic_visit(node) @@ -104,7 +104,7 @@ class _CachedWalker(ast.NodeVisitor): """ line, col = node.lineno, node.col_offset - pos = (line, col, line, col) + pos = (line, col, -1, -1) if isinstance(node.func, ast.Name): name = node.func.id diff --git a/parsers/java/pom.xml b/parsers/java/pom.xml index cfecc30..c2191b0 100644 --- a/parsers/java/pom.xml +++ b/parsers/java/pom.xml @@ -13,13 +13,12 @@ junit junit - 3.8.1 - test + 4.11 - + org.eclipse.jdt - core - 3.3.0-v_771 + org.eclipse.jdt.core + 3.7.1 diff --git a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java index 0150af4..4ba3623 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java @@ -76,7 +76,7 @@ public class JavaParser extends Parser { int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - data.put("coord", Symbols.createCoord(sl, sc, null, null)); + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); this._cache.push(data); return true; } @@ -97,8 +97,8 @@ public class JavaParser extends Parser { int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - Integer el = null; - Integer ec = null; + Integer el = -1; + Integer ec = -1; if (statements.size() > 0) { Statement last = statements.get(statements.size() - 1); @@ -127,7 +127,7 @@ public class JavaParser extends Parser { int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - data.put("coord", Symbols.createCoord(sl, sc, null, null)); + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); data.put("name", name); this._cache.push(data); return true; @@ -157,7 +157,7 @@ public class JavaParser extends Parser { int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - data.put("coord", Symbols.createCoord(sl, sc, null, null)); + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); this._cache.push(data); return true; } @@ -178,7 +178,7 @@ public class JavaParser extends Parser { int sl = this.root.getLineNumber(node.getStartPosition()); int sc = this.root.getColumnNumber(node.getStartPosition()); - data.put("coord", Symbols.createCoord(sl, sc, null, null)); + data.put("coord", Symbols.createCoord(sl, sc, -1, -1)); this._cache.push(data); return true; } diff --git a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java index dd15468..5419d5a 100644 --- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java +++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java @@ -29,7 +29,15 @@ public class JavaSymbols extends Symbols { } public boolean insertClassDeclaration(String name, HashMap data) { - this._classes.put(name, data); + ArrayList assignments = new ArrayList(10); + ArrayList uses = new ArrayList(10); + HashMap klass = new HashMap(); + + assignments.add(data.get("coord")); + klass.put("assignments", assignments); + klass.put("uses", uses); + + this._classes.put(name, klass); return true; } From be7c871cc96e9b328afe8a754c258e55391d5e4a Mon Sep 17 00:00:00 2001 From: Benjamin Attal Date: Mon, 5 May 2014 17:21:45 -0400 Subject: [PATCH 36/36] Add Rakefile task for running ruby parse server. --- parsers/ruby/Rakefile | 5 +++++ parsers/ruby/lib/parse_server.rb | 33 ++++++++++++++++++--------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/parsers/ruby/Rakefile b/parsers/ruby/Rakefile index e69de29..e66f695 100644 --- a/parsers/ruby/Rakefile +++ b/parsers/ruby/Rakefile @@ -0,0 +1,5 @@ +require File.expand_path('../lib/parse_server.rb', __FILE__) + +task :start_server do |t| + start_server +end diff --git a/parsers/ruby/lib/parse_server.rb b/parsers/ruby/lib/parse_server.rb index 0dcaebb..916f434 100644 --- a/parsers/ruby/lib/parse_server.rb +++ b/parsers/ruby/lib/parse_server.rb @@ -12,22 +12,25 @@ def pack_int(i) return bytes.pack('cccc') end -server = TCPServer.new 5003 -loop do - # Start a new thread for each client accepted - Thread.start(server.accept) do |client| - begin - # Get the amount of data to be read - size = (client.readline).to_i - p = Bitshift::Parser.new client.read(size) - # Get the parsed result - symbols = p.parse - client.puts pack_int(symbols.length) - client.puts symbols - ensure - # Close the socket - client.close +def start_server + server = TCPServer.new 5003 + + loop do + # Start a new thread for each client accepted + Thread.start(server.accept) do |client| + begin + # Get the amount of data to be read + size = (client.readline).to_i + p = Bitshift::Parser.new client.read(size) + # Get the parsed result + symbols = p.parse + client.puts pack_int(symbols.length) + client.puts symbols + ensure + # Close the socket + client.close + end end end end